✨ 迁移项目

2022-09-07 17:17:11 +08:00
parent 12ea86b2fb
commit 5d4d02d679
29 changed files with 11431 additions and 43 deletions
--- a/loki/batch.go
+++ b/loki/batch.go
@@ -0,0 +1,107 @@
+package loki
+
+import (
+	"time"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/golang/snappy"
+	json "github.com/json-iterator/go"
+
+	"github.com/lixh00/loki-client-go/pkg/logproto"
+)
+
+// batch holds pending log streams waiting to be sent to Loki, and it's used
+// to reduce the number of push requests to Loki aggregating multiple log streams
+// and entries in a single batch request. In case of multi-tenant Promtail, log
+// streams for each tenant are stored in a dedicated batch.
+type batch struct {
+	streams   map[string]*logproto.Stream
+	bytes     int
+	createdAt time.Time
+}
+
+func newBatch(entries ...entry) *batch {
+	b := &batch{
+		streams:   map[string]*logproto.Stream{},
+		bytes:     0,
+		createdAt: time.Now(),
+	}
+
+	// Add entries to the batch
+	for _, entry := range entries {
+		b.add(entry)
+	}
+
+	return b
+}
+
+// add an entry to the batch
+func (b *batch) add(entry entry) {
+	b.bytes += len(entry.Line)
+
+	// Append the entry to an already existing stream (if any)
+	labels := entry.labels.String()
+	if stream, ok := b.streams[labels]; ok {
+		stream.Entries = append(stream.Entries, entry.Entry)
+		return
+	}
+
+	// Add the entry as a new stream
+	b.streams[labels] = &logproto.Stream{
+		Labels:  labels,
+		Entries: []logproto.Entry{entry.Entry},
+	}
+}
+
+// sizeBytes returns the current batch size in bytes
+func (b *batch) sizeBytes() int {
+	return b.bytes
+}
+
+// sizeBytesAfter returns the size of the batch after the input entry
+// will be added to the batch itself
+func (b *batch) sizeBytesAfter(entry entry) int {
+	return b.bytes + len(entry.Line)
+}
+
+// age of the batch since its creation
+func (b *batch) age() time.Duration {
+	return time.Since(b.createdAt)
+}
+
+// encode the batch as snappy-compressed push request, and returns
+// the encoded bytes and the number of encoded entries
+func (b *batch) encode() ([]byte, int, error) {
+	req, entriesCount := b.createPushRequest()
+	buf, err := proto.Marshal(req)
+	if err != nil {
+		return nil, 0, err
+	}
+	buf = snappy.Encode(nil, buf)
+	return buf, entriesCount, nil
+}
+
+// encode the batch as json push request, and returns
+// the encoded bytes and the number of encoded entries
+func (b *batch) encodeJSON() ([]byte, int, error) {
+	req, entriesCount := b.createPushRequest()
+	buf, err := json.Marshal(req)
+	if err != nil {
+		return nil, 0, err
+	}
+	return buf, entriesCount, nil
+}
+
+// creates push request and returns it, together with number of entries
+func (b *batch) createPushRequest() (*logproto.PushRequest, int) {
+	req := logproto.PushRequest{
+		Streams: make([]logproto.Stream, 0, len(b.streams)),
+	}
+
+	entriesCount := 0
+	for _, stream := range b.streams {
+		req.Streams = append(req.Streams, *stream)
+		entriesCount += len(stream.Entries)
+	}
+	return &req, entriesCount
+}
--- a/loki/batch_test.go
+++ b/loki/batch_test.go
@@ -0,0 +1,139 @@
+package loki
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/lixh00/loki-client-go/pkg/logproto"
+)
+
+func TestBatch_add(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		inputEntries      []entry
+		expectedSizeBytes int
+	}{
+		"empty batch": {
+			inputEntries:      []entry{},
+			expectedSizeBytes: 0,
+		},
+		"single stream with single log entry": {
+			inputEntries: []entry{
+				{"tenant", model.LabelSet{}, logEntries[0].Entry},
+			},
+			expectedSizeBytes: len(logEntries[0].Entry.Line),
+		},
+		"single stream with multiple log entries": {
+			inputEntries: []entry{
+				{"tenant", model.LabelSet{}, logEntries[0].Entry},
+				{"tenant", model.LabelSet{}, logEntries[1].Entry},
+			},
+			expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line),
+		},
+		"multiple streams with multiple log entries": {
+			inputEntries: []entry{
+				{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
+				{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
+				{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
+			},
+			expectedSizeBytes: len(logEntries[0].Entry.Line) + len(logEntries[1].Entry.Line) + len(logEntries[2].Entry.Line),
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			b := newBatch()
+
+			for _, entry := range testData.inputEntries {
+				b.add(entry)
+			}
+
+			assert.Equal(t, testData.expectedSizeBytes, b.sizeBytes())
+		})
+	}
+}
+
+func TestBatch_encode(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		inputBatch           *batch
+		expectedEntriesCount int
+	}{
+		"empty batch": {
+			inputBatch:           newBatch(),
+			expectedEntriesCount: 0,
+		},
+		"single stream with single log entry": {
+			inputBatch: newBatch(
+				entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
+			),
+			expectedEntriesCount: 1,
+		},
+		"single stream with multiple log entries": {
+			inputBatch: newBatch(
+				entry{"tenant", model.LabelSet{}, logEntries[0].Entry},
+				entry{"tenant", model.LabelSet{}, logEntries[1].Entry},
+			),
+			expectedEntriesCount: 2,
+		},
+		"multiple streams with multiple log entries": {
+			inputBatch: newBatch(
+				entry{"tenant", model.LabelSet{"type": "a"}, logEntries[0].Entry},
+				entry{"tenant", model.LabelSet{"type": "a"}, logEntries[1].Entry},
+				entry{"tenant", model.LabelSet{"type": "b"}, logEntries[2].Entry},
+			),
+			expectedEntriesCount: 3,
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			t.Parallel()
+
+			_, entriesCount, err := testData.inputBatch.encode()
+			require.NoError(t, err)
+			assert.Equal(t, testData.expectedEntriesCount, entriesCount)
+		})
+	}
+}
+
+func TestHashCollisions(t *testing.T) {
+	b := newBatch()
+
+	ls1 := model.LabelSet{"app": "l", "uniq0": "0", "uniq1": "1"}
+	ls2 := model.LabelSet{"app": "m", "uniq0": "1", "uniq1": "1"}
+
+	require.False(t, ls1.Equal(ls2))
+	require.Equal(t, ls1.FastFingerprint(), ls2.FastFingerprint())
+
+	const entriesPerLabel = 10
+
+	for i := 0; i < entriesPerLabel; i++ {
+		b.add(entry{labels: ls1, Entry: logproto.Entry{Timestamp: time.Now(), Line: fmt.Sprintf("line %d", i)}})
+		b.add(entry{labels: ls2, Entry: logproto.Entry{Timestamp: time.Now(), Line: fmt.Sprintf("line %d", i)}})
+	}
+
+	// make sure that colliding labels are stored properly as independent streams
+	req, entries := b.createPushRequest()
+	assert.Len(t, req.Streams, 2)
+	assert.Equal(t, 2*entriesPerLabel, entries)
+
+	if req.Streams[0].Labels == ls1.String() {
+		assert.Equal(t, ls1.String(), req.Streams[0].Labels)
+		assert.Equal(t, ls2.String(), req.Streams[1].Labels)
+	} else {
+		assert.Equal(t, ls2.String(), req.Streams[0].Labels)
+		assert.Equal(t, ls1.String(), req.Streams[1].Labels)
+	}
+}
--- a/loki/client.go
+++ b/loki/client.go
@@ -0,0 +1,403 @@
+package loki
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/lixh00/loki-client-go/pkg/backoff"
+	"github.com/prometheus/prometheus/promql/parser"
+
+	"github.com/lixh00/loki-client-go/pkg/metric"
+
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/config"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/common/version"
+
+	"github.com/lixh00/loki-client-go/pkg/helpers"
+	"github.com/lixh00/loki-client-go/pkg/logproto"
+)
+
+const (
+	protoContentType = "application/x-protobuf"
+	JSONContentType  = "application/json"
+	maxErrMsgLen     = 1024
+
+	// Label reserved to override the tenant ID while processing
+	// pipeline stages
+	ReservedLabelTenantID = "__tenant_id__"
+
+	LatencyLabel = "filename"
+	HostLabel    = "host"
+)
+
+var (
+	encodedBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "promtail",
+		Name:      "encoded_bytes_total",
+		Help:      "Number of bytes encoded and ready to send.",
+	}, []string{HostLabel})
+	sentBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "promtail",
+		Name:      "sent_bytes_total",
+		Help:      "Number of bytes sent.",
+	}, []string{HostLabel})
+	droppedBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "promtail",
+		Name:      "dropped_bytes_total",
+		Help:      "Number of bytes dropped because failed to be sent to the ingester after all retries.",
+	}, []string{HostLabel})
+	sentEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "promtail",
+		Name:      "sent_entries_total",
+		Help:      "Number of log entries sent to the ingester.",
+	}, []string{HostLabel})
+	droppedEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "promtail",
+		Name:      "dropped_entries_total",
+		Help:      "Number of log entries dropped because failed to be sent to the ingester after all retries.",
+	}, []string{HostLabel})
+	requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "promtail",
+		Name:      "request_duration_seconds",
+		Help:      "Duration of send requests.",
+	}, []string{"status_code", HostLabel})
+	batchRetries = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "promtail",
+		Name:      "batch_retries_total",
+		Help:      "Number of times batches has had to be retried.",
+	}, []string{HostLabel})
+	streamLag *metric.Gauges
+
+	countersWithHost = []*prometheus.CounterVec{
+		encodedBytes, sentBytes, droppedBytes, sentEntries, droppedEntries,
+	}
+
+	UserAgent = fmt.Sprintf("promtail/%s", version.Version)
+)
+
+func init() {
+	prometheus.MustRegister(encodedBytes)
+	prometheus.MustRegister(sentBytes)
+	prometheus.MustRegister(droppedBytes)
+	prometheus.MustRegister(sentEntries)
+	prometheus.MustRegister(droppedEntries)
+	prometheus.MustRegister(requestDuration)
+	prometheus.MustRegister(batchRetries)
+	var err error
+	streamLag, err = metric.NewGauges("promtail_stream_lag_seconds",
+		"Difference between current time and last batch timestamp for successful sends",
+		metric.GaugeConfig{Action: "set"},
+		int64(1*time.Minute.Seconds()), // This strips out files which update slowly and reduces noise in this metric.
+	)
+	if err != nil {
+		panic(err)
+	}
+	prometheus.MustRegister(streamLag)
+}
+
+// Client for pushing logs in snappy-compressed protos over HTTP.
+type Client struct {
+	logger  log.Logger
+	cfg     Config
+	client  *http.Client
+	quit    chan struct{}
+	once    sync.Once
+	entries chan entry
+	wg      sync.WaitGroup
+
+	externalLabels model.LabelSet
+}
+
+type entry struct {
+	tenantID string
+	labels   model.LabelSet
+	logproto.Entry
+}
+
+// New makes a new Client from config
+func New(cfg Config) (*Client, error) {
+	logger := level.NewFilter(log.NewLogfmtLogger(os.Stdout), level.AllowWarn())
+	return NewWithLogger(cfg, logger)
+}
+
+// NewWithDefault creates a new client with default configuration.
+func NewWithDefault(url string) (*Client, error) {
+	cfg, err := NewDefaultConfig(url)
+	if err != nil {
+		return nil, err
+	}
+	return New(cfg)
+}
+
+// NewWithLogger makes a new Client from a logger and a config
+func NewWithLogger(cfg Config, logger log.Logger) (*Client, error) {
+	if cfg.URL.URL == nil {
+		return nil, errors.New("client needs target URL")
+	}
+
+	c := &Client{
+		logger:  log.With(logger, "component", "client", "host", cfg.URL.Host),
+		cfg:     cfg,
+		quit:    make(chan struct{}),
+		entries: make(chan entry),
+
+		externalLabels: cfg.ExternalLabels.LabelSet,
+	}
+
+	err := cfg.Client.Validate()
+	if err != nil {
+		return nil, err
+	}
+
+	c.client, err = config.NewClientFromConfig(cfg.Client, "promtail", config.WithKeepAlivesDisabled(), config.WithHTTP2Disabled())
+	if err != nil {
+		return nil, err
+	}
+
+	c.client.Timeout = cfg.Timeout
+
+	// Initialize counters to 0 so the metrics are exported before the first
+	// occurrence of incrementing to avoid missing metrics.
+	for _, counter := range countersWithHost {
+		counter.WithLabelValues(c.cfg.URL.Host).Add(0)
+	}
+
+	c.wg.Add(1)
+	go c.run()
+	return c, nil
+}
+
+func (c *Client) run() {
+	batches := map[string]*batch{}
+
+	// Given the client handles multiple batches (1 per tenant) and each batch
+	// can be created at a different point in time, we look for batches whose
+	// max wait time has been reached every 10 times per BatchWait, so that the
+	// maximum delay we have sending batches is 10% of the max waiting time.
+	// We apply a cap of 10ms to the ticker, to avoid too frequent checks in
+	// case the BatchWait is very low.
+	minWaitCheckFrequency := 10 * time.Millisecond
+	maxWaitCheckFrequency := c.cfg.BatchWait / 10
+	if maxWaitCheckFrequency < minWaitCheckFrequency {
+		maxWaitCheckFrequency = minWaitCheckFrequency
+	}
+
+	maxWaitCheck := time.NewTicker(maxWaitCheckFrequency)
+
+	defer func() {
+		// Send all pending batches
+		for tenantID, batch := range batches {
+			c.sendBatch(tenantID, batch)
+		}
+
+		c.wg.Done()
+	}()
+
+	for {
+		select {
+		case <-c.quit:
+			return
+
+		case e := <-c.entries:
+			batch, ok := batches[e.tenantID]
+
+			// If the batch doesn't exist yet, we create a new one with the entry
+			if !ok {
+				batches[e.tenantID] = newBatch(e)
+				break
+			}
+
+			// If adding the entry to the batch will increase the size over the max
+			// size allowed, we do send the current batch and then create a new one
+			if batch.sizeBytesAfter(e) > c.cfg.BatchSize {
+				c.sendBatch(e.tenantID, batch)
+
+				batches[e.tenantID] = newBatch(e)
+				break
+			}
+
+			// The max size of the batch isn't reached, so we can add the entry
+			batch.add(e)
+
+		case <-maxWaitCheck.C:
+			// Send all batches whose max wait time has been reached
+			for tenantID, batch := range batches {
+				if batch.age() < c.cfg.BatchWait {
+					continue
+				}
+
+				c.sendBatch(tenantID, batch)
+				delete(batches, tenantID)
+			}
+		}
+	}
+}
+
+func (c *Client) sendBatch(tenantID string, batch *batch) {
+	var (
+		err          error
+		buf          []byte
+		entriesCount int
+	)
+	if c.cfg.EncodeJson {
+		buf, entriesCount, err = batch.encodeJSON()
+	} else {
+		buf, entriesCount, err = batch.encode()
+	}
+
+	if err != nil {
+		level.Error(c.logger).Log("msg", "error encoding batch", "error", err)
+		return
+	}
+	bufBytes := float64(len(buf))
+	encodedBytes.WithLabelValues(c.cfg.URL.Host).Add(bufBytes)
+
+	ctx := context.Background()
+	backoff := backoff.New(ctx, c.cfg.BackoffConfig)
+	var status int
+	for backoff.Ongoing() {
+		start := time.Now()
+		status, err = c.send(ctx, tenantID, buf)
+		requestDuration.WithLabelValues(strconv.Itoa(status), c.cfg.URL.Host).Observe(time.Since(start).Seconds())
+
+		if err == nil {
+			sentBytes.WithLabelValues(c.cfg.URL.Host).Add(bufBytes)
+			sentEntries.WithLabelValues(c.cfg.URL.Host).Add(float64(entriesCount))
+			for _, s := range batch.streams {
+				lbls, err := parser.ParseMetric(s.Labels)
+				if err != nil {
+					// is this possible?
+					level.Warn(c.logger).Log("msg", "error converting stream label string to label.Labels, cannot update lagging metric", "error", err)
+					return
+				}
+				var lblSet model.LabelSet
+				for i := range lbls {
+					if lbls[i].Name == LatencyLabel {
+						lblSet = model.LabelSet{
+							model.LabelName(HostLabel):    model.LabelValue(c.cfg.URL.Host),
+							model.LabelName(LatencyLabel): model.LabelValue(lbls[i].Value),
+						}
+					}
+				}
+				if lblSet != nil {
+					streamLag.With(lblSet).Set(time.Since(s.Entries[len(s.Entries)-1].Timestamp).Seconds())
+				}
+			}
+			return
+		}
+
+		// Only retry 429s, 500s and connection-level errors.
+		if status > 0 && status != 429 && status/100 != 5 {
+			break
+		}
+
+		level.Warn(c.logger).Log("msg", "error sending batch, will retry", "status", status, "error", err)
+		batchRetries.WithLabelValues(c.cfg.URL.Host).Inc()
+		backoff.Wait()
+	}
+
+	if err != nil {
+		level.Error(c.logger).Log("msg", "final error sending batch", "status", status, "error", err)
+		droppedBytes.WithLabelValues(c.cfg.URL.Host).Add(bufBytes)
+		droppedEntries.WithLabelValues(c.cfg.URL.Host).Add(float64(entriesCount))
+	}
+}
+
+func (c *Client) send(ctx context.Context, tenantID string, buf []byte) (int, error) {
+	ctx, cancel := context.WithTimeout(ctx, c.cfg.Timeout)
+	defer cancel()
+	req, err := http.NewRequest("POST", c.cfg.URL.String(), bytes.NewReader(buf))
+	if err != nil {
+		return -1, err
+	}
+	req = req.WithContext(ctx)
+	req.Header.Set("Content-Type", protoContentType)
+	if c.cfg.EncodeJson {
+		req.Header.Set("Content-Type", JSONContentType)
+	}
+	req.Header.Set("User-Agent", UserAgent)
+
+	// If the tenant ID is not empty promtail is running in multi-tenant mode, so
+	// we should send it to Loki
+	if tenantID != "" {
+		req.Header.Set("X-Scope-OrgID", tenantID)
+	}
+
+	resp, err := c.client.Do(req)
+	if err != nil {
+		return -1, err
+	}
+	defer helpers.LogError(c.logger, "closing response body", resp.Body.Close)
+
+	if resp.StatusCode/100 != 2 {
+		scanner := bufio.NewScanner(io.LimitReader(resp.Body, maxErrMsgLen))
+		line := ""
+		if scanner.Scan() {
+			line = scanner.Text()
+		}
+		err = fmt.Errorf("server returned HTTP status %s (%d): %s", resp.Status, resp.StatusCode, line)
+	}
+	return resp.StatusCode, err
+}
+
+func (c *Client) getTenantID(labels model.LabelSet) string {
+	// Check if it has been overridden while processing the pipeline stages
+	if value, ok := labels[ReservedLabelTenantID]; ok {
+		return string(value)
+	}
+
+	// Check if has been specified in the config
+	if c.cfg.TenantID != "" {
+		return c.cfg.TenantID
+	}
+
+	// Defaults to an empty string, which means the X-Scope-OrgID header
+	// will not be sent
+	return ""
+}
+
+// Stop the client.
+func (c *Client) Stop() {
+	c.once.Do(func() { close(c.quit) })
+	c.wg.Wait()
+}
+
+// Handle implement EntryHandler; adds a new line to the next batch; send is async.
+func (c *Client) Handle(ls model.LabelSet, t time.Time, s string) error {
+	if len(c.externalLabels) > 0 {
+		ls = c.externalLabels.Merge(ls)
+	}
+
+	// Get the tenant  ID in case it has been overridden while processing
+	// the pipeline stages, then remove the special label
+	tenantID := c.getTenantID(ls)
+	if _, ok := ls[ReservedLabelTenantID]; ok {
+		// Clone the label set to not manipulate the input one
+		ls = ls.Clone()
+		delete(ls, ReservedLabelTenantID)
+	}
+
+	c.entries <- entry{tenantID, ls, logproto.Entry{
+		Timestamp: t,
+		Line:      s,
+	}}
+	return nil
+}
+
+func (c *Client) UnregisterLatencyMetric(labels model.LabelSet) {
+	labels[HostLabel] = model.LabelValue(c.cfg.URL.Host)
+	streamLag.Delete(labels)
+}
--- a/loki/client_test.go
+++ b/loki/client_test.go
@@ -0,0 +1,356 @@
+package loki
+
+import (
+	"io/ioutil"
+	"math"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/go-kit/kit/log"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/lixh00/loki-client-go/pkg/backoff"
+	"github.com/lixh00/loki-client-go/pkg/httputil"
+	"github.com/lixh00/loki-client-go/pkg/labelutil"
+	"github.com/lixh00/loki-client-go/pkg/urlutil"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/prometheus/common/config"
+	"github.com/prometheus/common/model"
+
+	"github.com/lixh00/loki-client-go/pkg/logproto"
+)
+
+var logEntries = []entry{
+	{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(1, 0).UTC(), Line: "line1"}},
+	{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(2, 0).UTC(), Line: "line2"}},
+	{labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Unix(3, 0).UTC(), Line: "line3"}},
+	{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(4, 0).UTC(), Line: "line4"}},
+	{labels: model.LabelSet{"__tenant_id__": "tenant-1"}, Entry: logproto.Entry{Timestamp: time.Unix(5, 0).UTC(), Line: "line5"}},
+	{labels: model.LabelSet{"__tenant_id__": "tenant-2"}, Entry: logproto.Entry{Timestamp: time.Unix(6, 0).UTC(), Line: "line6"}},
+}
+
+type receivedReq struct {
+	tenantID string
+	pushReq  logproto.PushRequest
+}
+
+func TestClient_Handle(t *testing.T) {
+	tests := map[string]struct {
+		clientBatchSize      int
+		clientBatchWait      time.Duration
+		clientMaxRetries     int
+		clientTenantID       string
+		serverResponseStatus int
+		inputEntries         []entry
+		inputDelay           time.Duration
+		expectedReqs         []receivedReq
+		expectedMetrics      string
+	}{
+		"batch log entries together until the batch size is reached": {
+			clientBatchSize:      10,
+			clientBatchWait:      100 * time.Millisecond,
+			clientMaxRetries:     3,
+			serverResponseStatus: 200,
+			inputEntries:         []entry{logEntries[0], logEntries[1], logEntries[2]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
+				},
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[2].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 3.0
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 0
+			`,
+		},
+		"batch log entries together until the batch wait time is reached": {
+			clientBatchSize:      10,
+			clientBatchWait:      100 * time.Millisecond,
+			clientMaxRetries:     3,
+			serverResponseStatus: 200,
+			inputEntries:         []entry{logEntries[0], logEntries[1]},
+			inputDelay:           110 * time.Millisecond,
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[1].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 2.0
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 0
+			`,
+		},
+		"retry send a batch up to backoff's max retries in case the server responds with a 5xx": {
+			clientBatchSize:      10,
+			clientBatchWait:      10 * time.Millisecond,
+			clientMaxRetries:     3,
+			serverResponseStatus: 500,
+			inputEntries:         []entry{logEntries[0]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 1.0
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 0
+			`,
+		},
+		"do not retry send a batch in case the server responds with a 4xx": {
+			clientBatchSize:      10,
+			clientBatchWait:      10 * time.Millisecond,
+			clientMaxRetries:     3,
+			serverResponseStatus: 400,
+			inputEntries:         []entry{logEntries[0]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 1.0
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 0
+			`,
+		},
+		"do retry sending a batch in case the server responds with a 429": {
+			clientBatchSize:      10,
+			clientBatchWait:      10 * time.Millisecond,
+			clientMaxRetries:     3,
+			serverResponseStatus: 429,
+			inputEntries:         []entry{logEntries[0]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 1.0
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 0
+			`,
+		},
+		"batch log entries together honoring the client tenant ID": {
+			clientBatchSize:      100,
+			clientBatchWait:      100 * time.Millisecond,
+			clientMaxRetries:     3,
+			clientTenantID:       "tenant-default",
+			serverResponseStatus: 200,
+			inputEntries:         []entry{logEntries[0], logEntries[1]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "tenant-default",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry, logEntries[1].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 2.0
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 0
+			`,
+		},
+		"batch log entries together honoring the tenant ID overridden while processing the pipeline stages": {
+			clientBatchSize:      100,
+			clientBatchWait:      100 * time.Millisecond,
+			clientMaxRetries:     3,
+			clientTenantID:       "tenant-default",
+			serverResponseStatus: 200,
+			inputEntries:         []entry{logEntries[0], logEntries[3], logEntries[4], logEntries[5]},
+			expectedReqs: []receivedReq{
+				{
+					tenantID: "tenant-default",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[0].Entry}}}},
+				},
+				{
+					tenantID: "tenant-1",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[3].Entry, logEntries[4].Entry}}}},
+				},
+				{
+					tenantID: "tenant-2",
+					pushReq:  logproto.PushRequest{Streams: []logproto.Stream{{Labels: "{}", Entries: []logproto.Entry{logEntries[5].Entry}}}},
+				},
+			},
+			expectedMetrics: `
+				# HELP promtail_sent_entries_total Number of log entries sent to the ingester.
+				# TYPE promtail_sent_entries_total counter
+				promtail_sent_entries_total{host="__HOST__"} 4.0
+				# HELP promtail_dropped_entries_total Number of log entries dropped because failed to be sent to the ingester after all retries.
+				# TYPE promtail_dropped_entries_total counter
+				promtail_dropped_entries_total{host="__HOST__"} 0
+			`,
+		},
+	}
+
+	for testName, testData := range tests {
+		t.Run(testName, func(t *testing.T) {
+			// Reset metrics
+			sentEntries.Reset()
+			droppedEntries.Reset()
+
+			// Create a buffer channel where we do enqueue received requests
+			receivedReqsChan := make(chan receivedReq, 10)
+
+			// Start a local HTTP server
+			server := httptest.NewServer(createServerHandler(receivedReqsChan, testData.serverResponseStatus))
+			require.NotNil(t, server)
+			defer server.Close()
+
+			// Get the URL at which the local test server is listening to
+			serverURL := urlutil.URLValue{}
+			err := serverURL.Set(server.URL)
+			require.NoError(t, err)
+
+			// Instance the client
+			cfg := Config{
+				URL:            serverURL,
+				BatchWait:      testData.clientBatchWait,
+				BatchSize:      testData.clientBatchSize,
+				Client:         config.HTTPClientConfig{},
+				BackoffConfig:  backoff.BackoffConfig{MinBackoff: 1 * time.Millisecond, MaxBackoff: 2 * time.Millisecond, MaxRetries: testData.clientMaxRetries},
+				ExternalLabels: labelutil.LabelSet{},
+				Timeout:        1 * time.Second,
+				TenantID:       testData.clientTenantID,
+			}
+
+			c, err := NewWithLogger(cfg, log.NewNopLogger())
+			require.NoError(t, err)
+
+			// Send all the input log entries
+			for i, logEntry := range testData.inputEntries {
+				err = c.Handle(logEntry.labels, logEntry.Timestamp, logEntry.Line)
+				require.NoError(t, err)
+
+				if testData.inputDelay > 0 && i < len(testData.inputEntries)-1 {
+					time.Sleep(testData.inputDelay)
+				}
+			}
+
+			// Wait until the expected push requests are received (with a timeout)
+			deadline := time.Now().Add(1 * time.Second)
+			for len(receivedReqsChan) < len(testData.expectedReqs) && time.Now().Before(deadline) {
+				time.Sleep(5 * time.Millisecond)
+			}
+
+			// Stop the client: it waits until the current batch is sent
+			c.Stop()
+			close(receivedReqsChan)
+
+			// Get all push requests received on the server side
+			receivedReqs := make([]receivedReq, 0)
+			for req := range receivedReqsChan {
+				receivedReqs = append(receivedReqs, req)
+			}
+
+			// Due to implementation details (maps iteration ordering is random) we just check
+			// that the expected requests are equal to the received requests, without checking
+			// the exact order which is not guaranteed in case of multi-tenant
+			require.ElementsMatch(t, testData.expectedReqs, receivedReqs)
+
+			expectedMetrics := strings.Replace(testData.expectedMetrics, "__HOST__", serverURL.Host, -1)
+			err = testutil.GatherAndCompare(prometheus.DefaultGatherer, strings.NewReader(expectedMetrics), "promtail_sent_entries_total", "promtail_dropped_entries_total")
+			assert.NoError(t, err)
+		})
+	}
+}
+
+func createServerHandler(receivedReqsChan chan receivedReq, status int) http.HandlerFunc {
+	return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+		// Parse the request
+		var pushReq logproto.PushRequest
+		if err := httputil.ParseProtoReader(req.Context(), req.Body, int(req.ContentLength), math.MaxInt32, &pushReq, httputil.RawSnappy); err != nil {
+			rw.WriteHeader(500)
+			return
+		}
+
+		receivedReqsChan <- receivedReq{
+			tenantID: req.Header.Get("X-Scope-OrgID"),
+			pushReq:  pushReq,
+		}
+
+		rw.WriteHeader(status)
+	})
+}
+
+type roundTripFunc func(r *http.Request) (*http.Response, error)
+
+func (s roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
+	return s(r)
+}
+
+func TestClient_EncodeJSON(t *testing.T) {
+	c, err := NewWithDefault("http://loki.com/loki/api/v1/push")
+	require.NoError(t, err)
+	c.cfg.EncodeJson = true
+
+	c.client.Transport = roundTripFunc(func(r *http.Request) (*http.Response, error) {
+		require.Equal(t, r.Header.Get("Content-Type"), JSONContentType)
+		require.Equal(t, r.URL.Path, "/loki/api/v1/push")
+		b, err := ioutil.ReadAll(r.Body)
+		require.NoError(t, err)
+		require.Equal(t, `{"streams":[{"stream":{"foo":"bar"},"values":[["1","11"],["2","22"]]},{"stream":{"foo":"buzz"},"values":[["3","33"],["4","44"]]}]}`, string(b))
+		return &http.Response{StatusCode: 200, Body: http.NoBody}, nil
+	})
+
+	c.sendBatch("",
+		newBatch(
+			entry{labels: model.LabelSet{"foo": "bar"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 1), Line: "11"}},
+			entry{labels: model.LabelSet{"foo": "bar"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 2), Line: "22"}},
+			entry{labels: model.LabelSet{"foo": "buzz"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 3), Line: "33"}},
+			entry{labels: model.LabelSet{"foo": "buzz"}, Entry: logproto.Entry{Timestamp: time.Unix(0, 4), Line: "44"}},
+		),
+	)
+}
--- a/loki/config.go
+++ b/loki/config.go
@@ -0,0 +1,109 @@
+package loki
+
+import (
+	"flag"
+	"time"
+
+	"github.com/lixh00/loki-client-go/pkg/backoff"
+	"github.com/lixh00/loki-client-go/pkg/labelutil"
+	"github.com/lixh00/loki-client-go/pkg/urlutil"
+	"github.com/prometheus/common/config"
+)
+
+// NOTE the helm chart for promtail and fluent-bit also have defaults for these values, please update to match if you make changes here.
+const (
+	BatchWait      = 1 * time.Second
+	BatchSize  int = 1024 * 1024
+	MinBackoff     = 500 * time.Millisecond
+	MaxBackoff     = 5 * time.Minute
+	MaxRetries int = 10
+	Timeout        = 10 * time.Second
+)
+
+// Config describes configuration for a HTTP pusher client.
+type Config struct {
+	URL       urlutil.URLValue
+	BatchWait time.Duration
+	BatchSize int
+
+	Client config.HTTPClientConfig `yaml:",inline"`
+
+	BackoffConfig backoff.BackoffConfig `yaml:"backoff_config"`
+	// The labels to add to any time series or alerts when communicating with loki
+	ExternalLabels labelutil.LabelSet `yaml:"external_labels,omitempty"`
+	Timeout        time.Duration      `yaml:"timeout"`
+
+	// The tenant ID to use when pushing logs to Loki (empty string means
+	// single tenant mode)
+	TenantID string `yaml:"tenant_id"`
+
+	// Use Loki JSON api as opposed to the snappy protobuf.
+	EncodeJson bool `yaml:"encode_json"`
+}
+
+// NewDefaultConfig creates a default configuration for a given target Loki URL.
+func NewDefaultConfig(url string) (Config, error) {
+	var cfg Config
+	var u urlutil.URLValue
+	f := &flag.FlagSet{}
+	cfg.RegisterFlags(f)
+	if err := f.Parse(nil); err != nil {
+		return cfg, err
+	}
+	if err := u.Set(url); err != nil {
+		return cfg, err
+	}
+	cfg.URL = u
+	return cfg, nil
+}
+
+// RegisterFlags with prefix registers flags where every name is prefixed by
+// prefix. If prefix is a non-empty string, prefix should end with a period.
+func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.Var(&c.URL, prefix+"client.url", "URL of log server")
+	f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", BatchWait, "Maximum wait period before sending batch.")
+	f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", BatchSize, "Maximum batch size to accrue before sending. ")
+	// Default backoff schedule: 0.5s, 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(4.267m) For a total time of 511.5s(8.5m) before logs are lost
+	f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", MaxRetries, "Maximum number of retires when sending batches.")
+	f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", MinBackoff, "Initial backoff time between retries.")
+	f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", MaxBackoff, "Maximum backoff time between retries.")
+	f.DurationVar(&c.Timeout, prefix+"client.timeout", Timeout, "Maximum time to wait for server to respond to a request")
+	f.Var(&c.ExternalLabels, prefix+"client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")
+
+	f.StringVar(&c.TenantID, prefix+"client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
+	f.BoolVar(&c.EncodeJson, prefix+"client.encode-json", false, "Encode payload in JSON, default to snappy protobuf")
+}
+
+// RegisterFlags registers flags.
+func (c *Config) RegisterFlags(flags *flag.FlagSet) {
+	c.RegisterFlagsWithPrefix("", flags)
+}
+
+// UnmarshalYAML implement Yaml Unmarshaler
+func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	type raw Config
+	var cfg raw
+	if c.URL.URL != nil {
+		// we used flags to set that value, which already has sane default.
+		cfg = raw(*c)
+	} else {
+		// force sane defaults.
+		cfg = raw{
+			BackoffConfig: backoff.BackoffConfig{
+				MaxBackoff: MaxBackoff,
+				MaxRetries: MaxRetries,
+				MinBackoff: MinBackoff,
+			},
+			BatchSize: BatchSize,
+			BatchWait: BatchWait,
+			Timeout:   Timeout,
+		}
+	}
+
+	if err := unmarshal(&cfg); err != nil {
+		return err
+	}
+
+	*c = Config(cfg)
+	return nil
+}
--- a/loki/config_test.go
+++ b/loki/config_test.go
@@ -0,0 +1,94 @@
+package loki
+
+import (
+	"net/url"
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/lixh00/loki-client-go/pkg/backoff"
+	"github.com/lixh00/loki-client-go/pkg/urlutil"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"gopkg.in/yaml.v2"
+)
+
+var clientConfig = Config{}
+
+var clientDefaultConfig = (`
+url: http://localhost:3100/loki/api/v1/push
+`)
+
+var clientCustomConfig = `
+url: http://localhost:3100/loki/api/v1/push
+backoff_config:
+  max_retries: 20
+  min_period: 5s
+  max_period: 1m
+batchwait: 5s
+batchsize: 204800
+timeout: 5s
+`
+
+func Test_Config(t *testing.T) {
+	u, err := url.Parse("http://localhost:3100/loki/api/v1/push")
+	require.NoError(t, err)
+	tests := []struct {
+		configValues   string
+		expectedConfig Config
+	}{
+		{
+			clientDefaultConfig,
+			Config{
+				URL: urlutil.URLValue{
+					URL: u,
+				},
+				BackoffConfig: backoff.BackoffConfig{
+					MaxBackoff: MaxBackoff,
+					MaxRetries: MaxRetries,
+					MinBackoff: MinBackoff,
+				},
+				BatchSize: BatchSize,
+				BatchWait: BatchWait,
+				Timeout:   Timeout,
+			},
+		},
+		{
+			clientCustomConfig,
+			Config{
+				URL: urlutil.URLValue{
+					URL: u,
+				},
+				BackoffConfig: backoff.BackoffConfig{
+					MaxBackoff: 1 * time.Minute,
+					MaxRetries: 20,
+					MinBackoff: 5 * time.Second,
+				},
+				BatchSize: 100 * 2048,
+				BatchWait: 5 * time.Second,
+				Timeout:   5 * time.Second,
+			},
+		},
+	}
+	for _, tc := range tests {
+		err := yaml.Unmarshal([]byte(tc.configValues), &clientConfig)
+		require.NoError(t, err)
+
+		if !reflect.DeepEqual(tc.expectedConfig, clientConfig) {
+			t.Errorf("Configs does not match, expected: %v, received: %v", tc.expectedConfig, clientConfig)
+		}
+	}
+}
+
+func TestDefaultConfig(t *testing.T) {
+	cfg, err := NewDefaultConfig("http://loki.com")
+	assert.Nil(t, err)
+	assert.Equal(t, cfg.BatchSize, BatchSize)
+	assert.Equal(t, cfg.BatchWait, BatchWait)
+	assert.Equal(t, cfg.Timeout, Timeout)
+	assert.Equal(t, cfg.BackoffConfig.MaxBackoff, MaxBackoff)
+	assert.Equal(t, cfg.BackoffConfig.MinBackoff, MinBackoff)
+	assert.Equal(t, cfg.BackoffConfig.MaxRetries, MaxRetries)
+	assert.Equal(t, cfg.URL.URL.String(), "http://loki.com")
+}