迁移项目

This commit is contained in:
2022-09-07 17:17:11 +08:00
parent 12ea86b2fb
commit 5d4d02d679
29 changed files with 11431 additions and 43 deletions

107
loki/batch.go Normal file
View File

@@ -0,0 +1,107 @@
package loki
import (
"time"
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
json "github.com/json-iterator/go"
"github.com/lixh00/loki-client-go/pkg/logproto"
)
// batch holds pending log streams waiting to be sent to Loki, and it's used
// to reduce the number of push requests to Loki aggregating multiple log streams
// and entries in a single batch request. In case of multi-tenant Promtail, log
// streams for each tenant are stored in a dedicated batch.
type batch struct {
streams map[string]*logproto.Stream
bytes int
createdAt time.Time
}
func newBatch(entries ...entry) *batch {
b := &batch{
streams: map[string]*logproto.Stream{},
bytes: 0,
createdAt: time.Now(),
}
// Add entries to the batch
for _, entry := range entries {
b.add(entry)
}
return b
}
// add an entry to the batch
func (b *batch) add(entry entry) {
b.bytes += len(entry.Line)
// Append the entry to an already existing stream (if any)
labels := entry.labels.String()
if stream, ok := b.streams[labels]; ok {
stream.Entries = append(stream.Entries, entry.Entry)
return
}
// Add the entry as a new stream
b.streams[labels] = &logproto.Stream{
Labels: labels,
Entries: []logproto.Entry{entry.Entry},
}
}
// sizeBytes returns the current batch size in bytes
func (b *batch) sizeBytes() int {
return b.bytes
}
// sizeBytesAfter returns the size of the batch after the input entry
// will be added to the batch itself
func (b *batch) sizeBytesAfter(entry entry) int {
return b.bytes + len(entry.Line)
}
// age of the batch since its creation
func (b *batch) age() time.Duration {
return time.Since(b.createdAt)
}
// encode the batch as snappy-compressed push request, and returns
// the encoded bytes and the number of encoded entries
func (b *batch) encode() ([]byte, int, error) {
req, entriesCount := b.createPushRequest()
buf, err := proto.Marshal(req)
if err != nil {
return nil, 0, err
}
buf = snappy.Encode(nil, buf)
return buf, entriesCount, nil
}
// encode the batch as json push request, and returns
// the encoded bytes and the number of encoded entries
func (b *batch) encodeJSON() ([]byte, int, error) {
req, entriesCount := b.createPushRequest()
buf, err := json.Marshal(req)
if err != nil {
return nil, 0, err
}
return buf, entriesCount, nil
}
// creates push request and returns it, together with number of entries
func (b *batch) createPushRequest() (*logproto.PushRequest, int) {
req := logproto.PushRequest{
Streams: make([]logproto.Stream, 0, len(b.streams)),
}
entriesCount := 0
for _, stream := range b.streams {
req.Streams = append(req.Streams, *stream)
entriesCount += len(stream.Entries)
}
return &req, entriesCount
}

139
loki/batch_test.go Normal file
View File

@@ -0,0 +1,139 @@
package loki
import (
"fmt"
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/lixh00/loki-client-go/pkg/logproto"
)
func TestBatch_add(t *testing.T) {
t.Parallel()
tests := map[string]struct {
inputEntries []entry
expectedSizeBytes int
}{
"empty batch": {
inputEntries: []entry{},
expectedSizeBytes: 0,
},
"single stream with single log entry": {
inputEntries: []entry{
{"tenant", model.LabelSet{}, logEntries[0].Entry},
},
expectedSizeBytes: len(logEntries[0].Entry.Line),
},
"single stream with multiple log entries": {
inputEntries: []entry{
{"tenant", model.LabelSet{}, logEntries[0].Entry},
{"tenant", model.LabelSet{}, logEntries[1].Entry},
},
expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line),
},
"multiple streams with multiple log entries": {
inputEntries: []entry{
{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
},
expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line) + len(logEntries[2].Entry.Line),
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
b := newBatch()
for _, entry := range testData.inputEntries {
b.add(entry)
}
assert.Equal(t, testData.expectedSizeBytes, b.sizeBytes())
})
}
}
func TestBatch_encode(t *testing.T) {
t.Parallel()
tests := map[string]struct {
inputBatch *batch
expectedEntriesCount int
}{
"empty batch": {
inputBatch: newBatch(),
expectedEntriesCount: 0,
},
"single stream with single log entry": {
inputBatch: newBatch(
entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
),
expectedEntriesCount: 1,
},
"single stream with multiple log entries": {
inputBatch: newBatch(
entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
entry{"tenant", model.LabelSet{}, logEntries[1].Entry},
),
expectedEntriesCount: 2,
},
"multiple streams with multiple log entries": {
inputBatch: newBatch(
entry{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
entry{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
entry{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
),
expectedEntriesCount: 3,
},
}
for testName, testData := range tests {
testData := testData
t.Run(testName, func(t *testing.T) {
t.Parallel()
_, entriesCount, err := testData.inputBatch.encode()
require.NoError(t, err)
assert.Equal(t, testData.expectedEntriesCount, entriesCount)
})
}
}
func TestHashCollisions(t *testing.T) {
b := newBatch()
ls1 := model.LabelSet{"app": "l", "uniq0": "0", "uniq1": "1"}
ls2 := model.LabelSet{"app": "m", "uniq0": "1", "uniq1": "1"}
require.False(t, ls1.Equal(ls2))
require.Equal(t, ls1.FastFingerprint(), ls2.FastFingerprint())
const entriesPerLabel = 10
for i := 0; i < entriesPerLabel; i++ {
b.add(entry{labels: ls1, Entry: logproto.Entry{Timestamp: time.Now(), Line: fmt.Sprintf("line %d", i)}})
b.add(entry{labels: ls2, Entry: logproto.Entry{Timestamp: time.Now(), Line: fmt.Sprintf("line %d", i)}})
}
// make sure that colliding labels are stored properly as independent streams
req, entries := b.createPushRequest()
assert.Len(t, req.Streams, 2)
assert.Equal(t, 2*entriesPerLabel, entries)
if req.Streams[0].Labels == ls1.String() {
assert.Equal(t, ls1.String(), req.Streams[0].Labels)
assert.Equal(t, ls2.String(), req.Streams[1].Labels)
} else {
assert.Equal(t, ls2.String(), req.Streams[0].Labels)
assert.Equal(t, ls1.String(), req.Streams[1].Labels)
}
}

403
loki/client.go Normal file
View File

@@ -0,0 +1,403 @@
package loki
import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
"os"
"strconv"
"sync"
"time"
"github.com/lixh00/loki-client-go/pkg/backoff"
"github.com/prometheus/prometheus/promql/parser"
"github.com/lixh00/loki-client-go/pkg/metric"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
"github.com/lixh00/loki-client-go/pkg/helpers"
"github.com/lixh00/loki-client-go/pkg/logproto"
)
const (
protoContentType = "application/x-protobuf"
JSONContentType = "application/json"
maxErrMsgLen = 1024
// Label reserved to override the tenant ID while processing
// pipeline stages
ReservedLabelTenantID = "__tenant_id__"
LatencyLabel = "filename"
HostLabel = "host"
)
var (
encodedBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "promtail",
Name: "encoded_bytes_total",
Help: "Number of bytes encoded and ready to send.",
}, []string{HostLabel})
sentBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "promtail",
Name: "sent_bytes_total",
Help: "Number of bytes sent.",
}, []string{HostLabel})
droppedBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "promtail",
Name: "dropped_bytes_total",
Help: "Number of bytes dropped because failed to be sent to the ingester after all retries.",
}, []string{HostLabel})
sentEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "promtail",
Name: "sent_entries_total",
Help: "Number of log entries sent to the ingester.",
}, []string{HostLabel})
droppedEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "promtail",
Name: "dropped_entries_total",
Help: "Number of log entries dropped because failed to be sent to the ingester after all retries.",
}, []string{HostLabel})
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "promtail",
Name: "request_duration_seconds",
Help: "Duration of send requests.",
}, []string{"status_code", HostLabel})
batchRetries = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "promtail",
Name: "batch_retries_total",
Help: "Number of times batches has had to be retried.",
}, []string{HostLabel})
streamLag *metric.Gauges
countersWithHost = []*prometheus.CounterVec{
encodedBytes, sentBytes, droppedBytes, sentEntries, droppedEntries,
}
UserAgent = fmt.Sprintf("promtail/%s", version.Version)
)
func init() {
prometheus.MustRegister(encodedBytes)
prometheus.MustRegister(sentBytes)
prometheus.MustRegister(droppedBytes)
prometheus.MustRegister(sentEntries)
prometheus.MustRegister(droppedEntries)
prometheus.MustRegister(requestDuration)
prometheus.MustRegister(batchRetries)
var err error
streamLag, err = metric.NewGauges("promtail_stream_lag_seconds",
"Difference between current time and last batch timestamp for successful sends",
metric.GaugeConfig{Action: "set"},
int64(1*time.Minute.Seconds()), // This strips out files which update slowly and reduces noise in this metric.
)
if err != nil {
panic(err)
}
prometheus.MustRegister(streamLag)
}
// Client for pushing logs in snappy-compressed protos over HTTP.
type Client struct {
logger log.Logger
cfg Config
client *http.Client
quit chan struct{}
once sync.Once
entries chan entry
wg sync.WaitGroup
externalLabels model.LabelSet
}
type entry struct {
tenantID string
labels model.LabelSet
logproto.Entry
}
// New makes a new Client from config
func New(cfg Config) (*Client, error) {
logger := level.NewFilter(log.NewLogfmtLogger(os.Stdout), level.AllowWarn())
return NewWithLogger(cfg, logger)
}
// NewWithDefault creates a new client with default configuration.
func NewWithDefault(url string) (*Client, error) {
cfg, err := NewDefaultConfig(url)
if err != nil {
return nil, err
}
return New(cfg)
}
// NewWithLogger makes a new Client from a logger and a config
func NewWithLogger(cfg Config, logger log.Logger) (*Client, error) {
if cfg.URL.URL == nil {
return nil, errors.New("client needs target URL")
}
c := &Client{
logger: log.With(logger, "component", "client", "host", cfg.URL.Host),
cfg: cfg,
quit: make(chan struct{}),
entries: make(chan entry),
externalLabels: cfg.ExternalLabels.LabelSet,
}
err := cfg.Client.Validate()
if err != nil {
return nil, err
}
c.client, err = config.NewClientFromConfig(cfg.Client, "promtail", config.WithKeepAlivesDisabled(), config.WithHTTP2Disabled())
if err != nil {
return nil, err
}
c.client.Timeout = cfg.Timeout
// Initialize counters to 0 so the metrics are exported before the first
// occurrence of incrementing to avoid missing metrics.
for _, counter := range countersWithHost {
counter.WithLabelValues(c.cfg.URL.Host).Add(0)
}
c.wg.Add(1)
go c.run()
return c, nil
}
func (c *Client) run() {
batches := map[string]*batch{}
// Given the client handles multiple batches (1 per tenant) and each batch
// can be created at a different point in time, we look for batches whose
// max wait time has been reached every 10 times per BatchWait, so that the
// maximum delay we have sending batches is 10% of the max waiting time.
// We apply a cap of 10ms to the ticker, to avoid too frequent checks in
// case the BatchWait is very low.
minWaitCheckFrequency := 10 * time.Millisecond
maxWaitCheckFrequency := c.cfg.BatchWait / 10
if maxWaitCheckFrequency < minWaitCheckFrequency {
maxWaitCheckFrequency = minWaitCheckFrequency
}
maxWaitCheck := time.NewTicker(maxWaitCheckFrequency)
defer func() {
// Send all pending batches
for tenantID, batch := range batches {
c.sendBatch(tenantID, batch)
}
c.wg.Done()
}()
for {
select {
case <-c.quit:
return
case e := <-c.entries:
batch, ok := batches[e.tenantID]
// If the batch doesn't exist yet, we create a new one with the entry
if !ok {
batches[e.tenantID] = newBatch(e)
break
}
// If adding the entry to the batch will increase the size over the max
// size allowed, we do send the current batch and then create a new one
if batch.sizeBytesAfter(e) > c.cfg.BatchSize {
c.sendBatch(e.tenantID, batch)
batches[e.tenantID] = newBatch(e)
break
}
// The max size of the batch isn't reached, so we can add the entry
batch.add(e)
case <-maxWaitCheck.C:
// Send all batches whose max wait time has been reached
for tenantID, batch := range batches {
if batch.age() < c.cfg.BatchWait {
continue
}
c.sendBatch(tenantID, batch)
delete(batches, tenantID)
}
}
}
}
func (c *Client) sendBatch(tenantID string, batch *batch) {
var (
err error
buf []byte
entriesCount int
)
if c.cfg.EncodeJson {
buf, entriesCount, err = batch.encodeJSON()
} else {
buf, entriesCount, err = batch.encode()
}
if err != nil {
level.Error(c.logger).Log("msg", "error encoding batch", "error", err)
return
}
bufBytes := float64(len(buf))
encodedBytes.WithLabelValues(c.cfg.URL.Host).Add(bufBytes)
ctx := context.Background()
backoff := backoff.New(ctx, c.cfg.BackoffConfig)
var status int
for backoff.Ongoing() {
start := time.Now()
status, err = c.send(ctx, tenantID, buf)
requestDuration.WithLabelValues(strconv.Itoa(status), c.cfg.URL.Host).Observe(time.Since(start).Seconds())
if err == nil {
sentBytes.WithLabelValues(c.cfg.URL.Host).Add(bufBytes)
sentEntries.WithLabelValues(c.cfg.URL.Host).Add(float64(entriesCount))
for _, s := range batch.streams {
lbls, err := parser.ParseMetric(s.Labels)
if err != nil {
// is this possible?
level.Warn(c.logger).Log("msg", "error converting stream label string to label.Labels, cannot update lagging metric", "error", err)
return
}
var lblSet model.LabelSet
for i := range lbls {
if lbls[i].Name == LatencyLabel {
lblSet = model.LabelSet{
model.LabelName(HostLabel): model.LabelValue(c.cfg.URL.Host),
model.LabelName(LatencyLabel): model.LabelValue(lbls[i].Value),
}
}
}
if lblSet != nil {
streamLag.With(lblSet).Set(time.Since(s.Entries[len(s.Entries)-1].Timestamp).Seconds())
}
}
return
}
// Only retry 429s, 500s and connection-level errors.
if status > 0 && status != 429 && status/100 != 5 {
break
}
level.Warn(c.logger).Log("msg", "error sending batch, will retry", "status", status, "error", err)
batchRetries.WithLabelValues(c.cfg.URL.Host).Inc()
backoff.Wait()
}
if err != nil {
level.Error(c.logger).Log("msg", "final error sending batch", "status", status, "error", err)
droppedBytes.WithLabelValues(c.cfg.URL.Host).Add(bufBytes)
droppedEntries.WithLabelValues(c.cfg.URL.Host).Add(float64(entriesCount))
}
}
func (c *Client) send(ctx context.Context, tenantID string, buf []byte) (int, error) {
ctx, cancel := context.WithTimeout(ctx, c.cfg.Timeout)
defer cancel()
req, err := http.NewRequest("POST", c.cfg.URL.String(), bytes.NewReader(buf))
if err != nil {
return -1, err
}
req = req.WithContext(ctx)
req.Header.Set("Content-Type", protoContentType)
if c.cfg.EncodeJson {
req.Header.Set("Content-Type", JSONContentType)
}
req.Header.Set("User-Agent", UserAgent)
// If the tenant ID is not empty promtail is running in multi-tenant mode, so
// we should send it to Loki
if tenantID != "" {
req.Header.Set("X-Scope-OrgID", tenantID)
}
resp, err := c.client.Do(req)
if err != nil {
return -1, err
}
defer helpers.LogError(c.logger, "closing response body", resp.Body.Close)
if resp.StatusCode/100 != 2 {
scanner := bufio.NewScanner(io.LimitReader(resp.Body, maxErrMsgLen))
line := ""
if scanner.Scan() {
line = scanner.Text()
}
err = fmt.Errorf("server returned HTTP status %s (%d): %s", resp.Status, resp.StatusCode, line)
}
return resp.StatusCode, err
}
func (c *Client) getTenantID(labels model.LabelSet) string {
// Check if it has been overridden while processing the pipeline stages
if value, ok := labels[ReservedLabelTenantID]; ok {
return string(value)
}
// Check if has been specified in the config
if c.cfg.TenantID != "" {
return c.cfg.TenantID
}
// Defaults to an empty string, which means the X-Scope-OrgID header
// will not be sent
return ""
}
// Stop the client.
func (c *Client) Stop() {
c.once.Do(func() { close(c.quit) })
c.wg.Wait()
}
// Handle implement EntryHandler; adds a new line to the next batch; send is async.
func (c *Client) Handle(ls model.LabelSet, t time.Time, s string) error {
if len(c.externalLabels) > 0 {
ls = c.externalLabels.Merge(ls)
}
// Get the tenant ID in case it has been overridden while processing
// the pipeline stages, then remove the special label
tenantID := c.getTenantID(ls)
if _, ok := ls[ReservedLabelTenantID]; ok {
// Clone the label set to not manipulate the input one
ls = ls.Clone()
delete(ls, ReservedLabelTenantID)
}
c.entries <- entry{tenantID, ls, logproto.Entry{
Timestamp: t,
Line: s,
}}
return nil
}
func (c *Client) UnregisterLatencyMetric(labels model.LabelSet) {
labels[HostLabel] = model.LabelValue(c.cfg.URL.Host)
streamLag.Delete(labels)
}

356
loki/client_test.go Normal file
View File

@@ -0,0 +1,356 @@
package loki
import (
"io/ioutil"
"math"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/go-kit/kit/log"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/lixh00/loki-client-go/pkg/backoff"
"github.com/lixh00/loki-client-go/pkg/httputil"
"github.com/lixh00/loki-client-go/pkg/labelutil"
"github.com/lixh00/loki-client-go/pkg/urlutil"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/lixh00/loki-client-go/pkg/logproto"
)
var logEntries = []entry{
{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(1, 0).UTC(), Line: "line1"}},
{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(2, 0).UTC(), Line: "line2"}},
{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(3, 0).UTC(), Line: "line3"}},
{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(4, 0).UTC(), Line: "line4"}},
{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(5, 0).UTC(), Line: "line5"}},
{labels: model.LabelSet{"__tenant_id__": "tenant-2"}, Entry: logproto.Entry{Timestamp: time.Unix(6, 0).UTC(), Line: "line6"}},
}
type receivedReq struct {
tenantID string
pushReq logproto.PushRequest
}
func TestClient_Handle(t *testing.T) {
tests := map[string]struct {
clientBatchSize int
clientBatchWait time.Duration
clientMaxRetries int
clientTenantID string
serverResponseStatus int
inputEntries []entry
inputDelay time.Duration
expectedReqs []receivedReq
expectedMetrics string
}{
"batch log entries together until the batch size is reached": {
clientBatchSize: 10,
clientBatchWait: 100 * time.Millisecond,
clientMaxRetries: 3,
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[1], logEntries[2]},
expectedReqs: []receivedReq{
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
},
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 3.0
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 0
`,
},
"batch log entries together until the batch wait time is reached": {
clientBatchSize: 10,
clientBatchWait: 100 * time.Millisecond,
clientMaxRetries: 3,
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[1]},
inputDelay: 110 * time.Millisecond,
expectedReqs: []receivedReq{
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[1].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 2.0
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 0
`,
},
"retry send a batch up to backoff's max retries in case the server responds with a 5xx": {
clientBatchSize: 10,
clientBatchWait: 10 * time.Millisecond,
clientMaxRetries: 3,
serverResponseStatus: 500,
inputEntries: []entry{logEntries[0]},
expectedReqs: []receivedReq{
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 1.0
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 0
`,
},
"do not retry send a batch in case the server responds with a 4xx": {
clientBatchSize: 10,
clientBatchWait: 10 * time.Millisecond,
clientMaxRetries: 3,
serverResponseStatus: 400,
inputEntries: []entry{logEntries[0]},
expectedReqs: []receivedReq{
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 1.0
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 0
`,
},
"do retry sending a batch in case the server responds with a 429": {
clientBatchSize: 10,
clientBatchWait: 10 * time.Millisecond,
clientMaxRetries: 3,
serverResponseStatus: 429,
inputEntries: []entry{logEntries[0]},
expectedReqs: []receivedReq{
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 1.0
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 0
`,
},
"batch log entries together honoring the client tenant ID": {
clientBatchSize: 100,
clientBatchWait: 100 * time.Millisecond,
clientMaxRetries: 3,
clientTenantID: "tenant-default",
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[1]},
expectedReqs: []receivedReq{
{
tenantID: "tenant-default",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 2.0
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 0
`,
},
"batch log entries together honoring the tenant ID overridden while processing the pipeline stages": {
clientBatchSize: 100,
clientBatchWait: 100 * time.Millisecond,
clientMaxRetries: 3,
clientTenantID: "tenant-default",
serverResponseStatus: 200,
inputEntries: []entry{logEntries[0], logEntries[3], logEntries[4], logEntries[5]},
expectedReqs: []receivedReq{
{
tenantID: "tenant-default",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
},
{
tenantID: "tenant-1",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[3].Entry, logEntries[4].Entry}}}},
},
{
tenantID: "tenant-2",
pushReq: logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[5].Entry}}}},
},
},
expectedMetrics: `
# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
# TYPE promtail_sent_entries_total counter
promtail_sent_entries_total{host="__HOST__"} 4.0
# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
# TYPE promtail_dropped_entries_total counter
promtail_dropped_entries_total{host="__HOST__"} 0
`,
},
}
for testName, testData := range tests {
t.Run(testName, func(t *testing.T) {
// Reset metrics
sentEntries.Reset()
droppedEntries.Reset()
// Create a buffer channel where we do enqueue received requests
receivedReqsChan := make(chan receivedReq, 10)
// Start a local HTTP server
server := httptest.NewServer(createServerHandler(receivedReqsChan, testData.serverResponseStatus))
require.NotNil(t, server)
defer server.Close()
// Get the URL at which the local test server is listening to
serverURL := urlutil.URLValue{}
err := serverURL.Set(server.URL)
require.NoError(t, err)
// Instance the client
cfg := Config{
URL: serverURL,
BatchWait: testData.clientBatchWait,
BatchSize: testData.clientBatchSize,
Client: config.HTTPClientConfig{},
BackoffConfig: backoff.BackoffConfig{MinBackoff: 1 * time.Millisecond, MaxBackoff: 2 * time.Millisecond, MaxRetries: testData.clientMaxRetries},
ExternalLabels: labelutil.LabelSet{},
Timeout: 1 * time.Second,
TenantID: testData.clientTenantID,
}
c, err := NewWithLogger(cfg, log.NewNopLogger())
require.NoError(t, err)
// Send all the input log entries
for i, logEntry := range testData.inputEntries {
err = c.Handle(logEntry.labels, logEntry.Timestamp, logEntry.Line)
require.NoError(t, err)
if testData.inputDelay > 0 && i < len(testData.inputEntries)-1 {
time.Sleep(testData.inputDelay)
}
}
// Wait until the expected push requests are received (with a timeout)
deadline := time.Now().Add(1 * time.Second)
for len(receivedReqsChan) < len(testData.expectedReqs) && time.Now().Before(deadline) {
time.Sleep(5 * time.Millisecond)
}
// Stop the client: it waits until the current batch is sent
c.Stop()
close(receivedReqsChan)
// Get all push requests received on the server side
receivedReqs := make([]receivedReq, 0)
for req := range receivedReqsChan {
receivedReqs = append(receivedReqs, req)
}
// Due to implementation details (maps iteration ordering is random) we just check
// that the expected requests are equal to the received requests, without checking
// the exact order which is not guaranteed in case of multi-tenant
require.ElementsMatch(t, testData.expectedReqs, receivedReqs)
expectedMetrics := strings.Replace(testData.expectedMetrics, "__HOST__", serverURL.Host, -1)
err = testutil.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(expectedMetrics), "promtail_sent_entries_total", "promtail_dropped_entries_total")
assert.NoError(t, err)
})
}
}
func createServerHandler(receivedReqsChan chan receivedReq, status int) http.HandlerFunc {
return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
// Parse the request
var pushReq logproto.PushRequest
if err := httputil.ParseProtoReader(req.Context(), req.Body, int(req.ContentLength), math.MaxInt32, &pushReq, httputil.RawSnappy); err != nil {
rw.WriteHeader(500)
return
}
receivedReqsChan <- receivedReq{
tenantID: req.Header.Get("X-Scope-OrgID"),
pushReq: pushReq,
}
rw.WriteHeader(status)
})
}
type roundTripFunc func(r *http.Request) (*http.Response, error)
func (s roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
return s(r)
}
func TestClient_EncodeJSON(t *testing.T) {
c, err := NewWithDefault("http://loki.com/loki/api/v1/push")
require.NoError(t, err)
c.cfg.EncodeJson = true
c.client.Transport = roundTripFunc(func(r *http.Request) (*http.Response, error) {
require.Equal(t, r.Header.Get("Content-Type"), JSONContentType)
require.Equal(t, r.URL.Path, "/loki/api/v1/push")
b, err := ioutil.ReadAll(r.Body)
require.NoError(t, err)
require.Equal(t, `{"streams":[{"stream":{"foo":"bar"},"values":[["1","11"],["2","22"]]},{"stream":{"foo":"buzz"},"values":[["3","33"],["4","44"]]}]}`, string(b))
return &http.Response{StatusCode: 200, Body: http.NoBody}, nil
})
c.sendBatch("",
newBatch(
entry{labels: model.LabelSet{"foo": "bar"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 1), Line: "11"}},
entry{labels: model.LabelSet{"foo": "bar"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 2), Line: "22"}},
entry{labels: model.LabelSet{"foo": "buzz"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 3), Line: "33"}},
entry{labels: model.LabelSet{"foo": "buzz"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 4), Line: "44"}},
),
)
}

109
loki/config.go Normal file
View File

@@ -0,0 +1,109 @@
package loki
import (
"flag"
"time"
"github.com/lixh00/loki-client-go/pkg/backoff"
"github.com/lixh00/loki-client-go/pkg/labelutil"
"github.com/lixh00/loki-client-go/pkg/urlutil"
"github.com/prometheus/common/config"
)
// NOTE the helm chart for promtail and fluent-bit also have defaults for these values, please update to match if you make changes here.
const (
BatchWait = 1 * time.Second
BatchSize int = 1024 * 1024
MinBackoff = 500 * time.Millisecond
MaxBackoff = 5 * time.Minute
MaxRetries int = 10
Timeout = 10 * time.Second
)
// Config describes configuration for a HTTP pusher client.
type Config struct {
URL urlutil.URLValue
BatchWait time.Duration
BatchSize int
Client config.HTTPClientConfig `yaml:",inline"`
BackoffConfig backoff.BackoffConfig `yaml:"backoff_config"`
// The labels to add to any time series or alerts when communicating with loki
ExternalLabels labelutil.LabelSet `yaml:"external_labels,omitempty"`
Timeout time.Duration `yaml:"timeout"`
// The tenant ID to use when pushing logs to Loki (empty string means
// single tenant mode)
TenantID string `yaml:"tenant_id"`
// Use Loki JSON api as opposed to the snappy protobuf.
EncodeJson bool `yaml:"encode_json"`
}
// NewDefaultConfig creates a default configuration for a given target Loki URL.
func NewDefaultConfig(url string) (Config, error) {
var cfg Config
var u urlutil.URLValue
f := &flag.FlagSet{}
cfg.RegisterFlags(f)
if err := f.Parse(nil); err != nil {
return cfg, err
}
if err := u.Set(url); err != nil {
return cfg, err
}
cfg.URL = u
return cfg, nil
}
// RegisterFlags with prefix registers flags where every name is prefixed by
// prefix. If prefix is a non-empty string, prefix should end with a period.
func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.Var(&c.URL, prefix+"client.url", "URL of log server")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", BatchWait, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", BatchSize, "Maximum batch size to accrue before sending. ")
// Default backoff schedule: 0.5s, 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(4.267m) For a total time of 511.5s(8.5m) before logs are lost
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", MaxRetries, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", MinBackoff, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", MaxBackoff, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", Timeout, "Maximum time to wait for server to respond to a request")
f.Var(&c.ExternalLabels, prefix+"client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")
f.StringVar(&c.TenantID, prefix+"client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
f.BoolVar(&c.EncodeJson, prefix+"client.encode-json", false, "Encode payload in JSON, default to snappy protobuf")
}
// RegisterFlags registers flags.
func (c *Config) RegisterFlags(flags *flag.FlagSet) {
c.RegisterFlagsWithPrefix("", flags)
}
// UnmarshalYAML implement Yaml Unmarshaler
func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
type raw Config
var cfg raw
if c.URL.URL != nil {
// we used flags to set that value, which already has sane default.
cfg = raw(*c)
} else {
// force sane defaults.
cfg = raw{
BackoffConfig: backoff.BackoffConfig{
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
}
}
if err := unmarshal(&cfg); err != nil {
return err
}
*c = Config(cfg)
return nil
}

94
loki/config_test.go Normal file
View File

@@ -0,0 +1,94 @@
package loki
import (
"net/url"
"reflect"
"testing"
"time"
"github.com/lixh00/loki-client-go/pkg/backoff"
"github.com/lixh00/loki-client-go/pkg/urlutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
)
var clientConfig = Config{}
var clientDefaultConfig = (`
url: http://localhost:3100/loki/api/v1/push
`)
var clientCustomConfig = `
url: http://localhost:3100/loki/api/v1/push
backoff_config:
max_retries: 20
min_period: 5s
max_period: 1m
batchwait: 5s
batchsize: 204800
timeout: 5s
`
func Test_Config(t *testing.T) {
u, err := url.Parse("http://localhost:3100/loki/api/v1/push")
require.NoError(t, err)
tests := []struct {
configValues string
expectedConfig Config
}{
{
clientDefaultConfig,
Config{
URL: urlutil.URLValue{
URL: u,
},
BackoffConfig: backoff.BackoffConfig{
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
},
},
{
clientCustomConfig,
Config{
URL: urlutil.URLValue{
URL: u,
},
BackoffConfig: backoff.BackoffConfig{
MaxBackoff: 1 * time.Minute,
MaxRetries: 20,
MinBackoff: 5 * time.Second,
},
BatchSize: 100 * 2048,
BatchWait: 5 * time.Second,
Timeout: 5 * time.Second,
},
},
}
for _, tc := range tests {
err := yaml.Unmarshal([]byte(tc.configValues), &clientConfig)
require.NoError(t, err)
if !reflect.DeepEqual(tc.expectedConfig, clientConfig) {
t.Errorf("Configs does not match, expected: %v, received: %v", tc.expectedConfig, clientConfig)
}
}
}
func TestDefaultConfig(t *testing.T) {
cfg, err := NewDefaultConfig("http://loki.com")
assert.Nil(t, err)
assert.Equal(t, cfg.BatchSize, BatchSize)
assert.Equal(t, cfg.BatchWait, BatchWait)
assert.Equal(t, cfg.Timeout, Timeout)
assert.Equal(t, cfg.BackoffConfig.MaxBackoff, MaxBackoff)
assert.Equal(t, cfg.BackoffConfig.MinBackoff, MinBackoff)
assert.Equal(t, cfg.BackoffConfig.MaxRetries, MaxRetries)
assert.Equal(t, cfg.URL.URL.String(), "http://loki.com")
}