mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-12-26 07:29:04 +01:00
Improved dropping of metrics failed to send
This commit is contained in:
parent
580d21d8bb
commit
0ca6d1a794
@ -33,10 +33,12 @@ type InfluxSink struct {
|
|||||||
BatchSize int `json:"batch_size,omitempty"`
|
BatchSize int `json:"batch_size,omitempty"`
|
||||||
// Interval, in which is buffer flushed if it has not been already written (by reaching batch size). Default 1s
|
// Interval, in which is buffer flushed if it has not been already written (by reaching batch size). Default 1s
|
||||||
FlushInterval string `json:"flush_delay,omitempty"`
|
FlushInterval string `json:"flush_delay,omitempty"`
|
||||||
|
RetryInterval string `json:"retry_delay,omitempty"`
|
||||||
}
|
}
|
||||||
batch []*write.Point
|
batch []*write.Point
|
||||||
flushTimer *time.Timer
|
flushTimer *time.Timer
|
||||||
flushDelay time.Duration
|
flushDelay time.Duration
|
||||||
|
retryDelay time.Duration
|
||||||
lock sync.Mutex // Flush() runs in another goroutine, so this lock has to protect the buffer
|
lock sync.Mutex // Flush() runs in another goroutine, so this lock has to protect the buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,6 +94,8 @@ func (s *InfluxSink) connect() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *InfluxSink) Write(m lp.CCMetric) error {
|
func (s *InfluxSink) Write(m lp.CCMetric) error {
|
||||||
|
// Lock access to batch slice
|
||||||
|
s.lock.Lock()
|
||||||
|
|
||||||
if len(s.batch) == 0 && s.flushDelay != 0 {
|
if len(s.batch) == 0 && s.flushDelay != 0 {
|
||||||
// This is the first write since the last flush, start the flushTimer!
|
// This is the first write since the last flush, start the flushTimer!
|
||||||
@ -109,22 +113,34 @@ func (s *InfluxSink) Write(m lp.CCMetric) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// batch slice full, dropping oldest metric
|
||||||
|
// e.g. when previous flushes failed and batch slice was not cleared
|
||||||
|
if len(s.batch) == s.config.BatchSize {
|
||||||
|
newSize := len(s.batch) - 1
|
||||||
|
for i := 0; i < newSize; i++ {
|
||||||
|
s.batch[i] = s.batch[i+1]
|
||||||
|
}
|
||||||
|
s.batch[newSize] = nil
|
||||||
|
s.batch = s.batch[:newSize]
|
||||||
|
cclog.ComponentError(s.name, "Batch slice full, dropping oldest metric")
|
||||||
|
}
|
||||||
|
|
||||||
// Append metric to batch slice
|
// Append metric to batch slice
|
||||||
p := m.ToPoint(s.meta_as_tags)
|
p := m.ToPoint(s.meta_as_tags)
|
||||||
s.lock.Lock()
|
|
||||||
s.batch = append(s.batch, p)
|
s.batch = append(s.batch, p)
|
||||||
s.lock.Unlock()
|
|
||||||
|
|
||||||
// Flush synchronously if "flush_delay" is zero
|
// Flush synchronously if "flush_delay" is zero
|
||||||
if s.flushDelay == 0 {
|
// or
|
||||||
return s.Flush()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush if batch size is reached
|
// Flush if batch size is reached
|
||||||
if len(s.batch) == s.config.BatchSize {
|
if s.flushDelay == 0 ||
|
||||||
|
len(s.batch) == s.config.BatchSize {
|
||||||
|
// Unlock access to batch slice
|
||||||
|
s.lock.Unlock()
|
||||||
return s.Flush()
|
return s.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unlock access to batch slice
|
||||||
|
s.lock.Unlock()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,20 +158,27 @@ func (s *InfluxSink) Flush() error {
|
|||||||
|
|
||||||
// Send metrics from batch slice
|
// Send metrics from batch slice
|
||||||
err := s.writeApi.WritePoint(context.Background(), s.batch...)
|
err := s.writeApi.WritePoint(context.Background(), s.batch...)
|
||||||
|
if err != nil {
|
||||||
|
|
||||||
|
// Setup timer to retry flush
|
||||||
|
time.AfterFunc(
|
||||||
|
s.retryDelay,
|
||||||
|
func() {
|
||||||
|
if err := s.Flush(); err != nil {
|
||||||
|
cclog.ComponentError(s.name, "flush retry failed:", err.Error())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
cclog.ComponentError(s.name, "flush failed:", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Clear batch slice
|
// Clear batch slice
|
||||||
// (when sending the metrics failed, metrics get dropped and are lost)
|
|
||||||
for i := range s.batch {
|
for i := range s.batch {
|
||||||
s.batch[i] = nil
|
s.batch[i] = nil
|
||||||
}
|
}
|
||||||
s.batch = s.batch[:0]
|
s.batch = s.batch[:0]
|
||||||
|
|
||||||
// Report errors for sending metrics
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(s.name, "flush failed:", err.Error())
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,6 +197,7 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
// Set config default values
|
// Set config default values
|
||||||
s.config.BatchSize = 100
|
s.config.BatchSize = 100
|
||||||
s.config.FlushInterval = "1s"
|
s.config.FlushInterval = "1s"
|
||||||
|
s.config.RetryInterval = "5s"
|
||||||
|
|
||||||
// Read config
|
// Read config
|
||||||
if len(config) > 0 {
|
if len(config) > 0 {
|
||||||
@ -213,6 +237,14 @@ func NewInfluxSink(name string, config json.RawMessage) (Sink, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Configure flush delay duration
|
||||||
|
if len(s.config.RetryInterval) > 0 {
|
||||||
|
t, err := time.ParseDuration(s.config.RetryInterval)
|
||||||
|
if err == nil {
|
||||||
|
s.retryDelay = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// allocate batch slice
|
// allocate batch slice
|
||||||
s.batch = make([]*write.Point, 0, s.config.BatchSize)
|
s.batch = make([]*write.Point, 0, s.config.BatchSize)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user