Skip to content

Commit 3525c6f

Browse files
authored
[refactor] - s3 metrics (#3760)
* use histogram * use exponential
1 parent c24b4d5 commit 3525c6f

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

pkg/sources/s3/metrics.go

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ import (
1111
type metricsCollector interface {
1212
// Object metrics.
1313

14-
RecordObjectScanned(bucket string)
15-
RecordObjectSkipped(bucket, reason string)
14+
RecordObjectScanned(bucket string, sizeBytes float64)
15+
RecordObjectSkipped(bucket, reason string, sizeBytes float64)
1616
RecordObjectError(bucket string)
1717

1818
// Role metrics.
@@ -22,8 +22,8 @@ type metricsCollector interface {
2222
}
2323

2424
type collector struct {
25-
objectsScanned *prometheus.CounterVec
26-
objectsSkipped *prometheus.CounterVec
25+
objectsScanned *prometheus.HistogramVec
26+
objectsSkipped *prometheus.HistogramVec
2727
objectsErrors *prometheus.CounterVec
2828
rolesScanned *prometheus.GaugeVec
2929
bucketsPerRole *prometheus.GaugeVec
@@ -33,18 +33,22 @@ var metricsInstance metricsCollector
3333

3434
func init() {
3535
metricsInstance = &collector{
36-
objectsScanned: promauto.NewCounterVec(prometheus.CounterOpts{
36+
objectsScanned: promauto.NewHistogramVec(prometheus.HistogramOpts{
3737
Namespace: common.MetricsNamespace,
3838
Subsystem: common.MetricsSubsystem,
39-
Name: "objects_scanned_total",
40-
Help: "Total number of S3 objects successfully scanned",
39+
Name: "objects_scanned_bytes",
40+
Help: "Size distribution of successfully scanned S3 objects in bytes",
41+
// 64B, 512B, 4KB, 32KB, 256KB, 2MB, 16MB, 128MB, 1GB.
42+
Buckets: prometheus.ExponentialBuckets(64, 8, 9),
4143
}, []string{"bucket"}),
4244

43-
objectsSkipped: promauto.NewCounterVec(prometheus.CounterOpts{
45+
objectsSkipped: promauto.NewHistogramVec(prometheus.HistogramOpts{
4446
Namespace: common.MetricsNamespace,
4547
Subsystem: common.MetricsSubsystem,
46-
Name: "objects_skipped_total",
47-
Help: "Total number of S3 objects skipped during scan",
48+
Name: "objects_skipped_bytes",
49+
Help: "Size distribution of skipped S3 objects in bytes",
50+
// 64B, 512B, 4KB, 32KB, 256KB, 2MB, 16MB, 128MB, 1GB.
51+
Buckets: prometheus.ExponentialBuckets(64, 8, 9),
4852
}, []string{"bucket", "reason"}),
4953

5054
objectsErrors: promauto.NewCounterVec(prometheus.CounterOpts{
@@ -70,12 +74,12 @@ func init() {
7074
}
7175
}
7276

73-
func (c *collector) RecordObjectScanned(bucket string) {
74-
c.objectsScanned.WithLabelValues(bucket).Inc()
77+
func (c *collector) RecordObjectScanned(bucket string, sizeBytes float64) {
78+
c.objectsScanned.WithLabelValues(bucket).Observe(sizeBytes)
7579
}
7680

77-
func (c *collector) RecordObjectSkipped(bucket, reason string) {
78-
c.objectsSkipped.WithLabelValues(bucket, reason).Inc()
81+
func (c *collector) RecordObjectSkipped(bucket, reason string, sizeBytes float64) {
82+
c.objectsSkipped.WithLabelValues(bucket, reason).Observe(sizeBytes)
7983
}
8084

8185
func (c *collector) RecordObjectError(bucket string) {

pkg/sources/s3/s3.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ func (s *Source) pageChunker(
432432

433433
for objIdx, obj := range metadata.page.Contents {
434434
if obj == nil {
435-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "nil_object")
435+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "nil_object", 0)
436436
if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil {
437437
ctx.Logger().Error(err, "could not update progress for nil object")
438438
}
@@ -448,7 +448,7 @@ func (s *Source) pageChunker(
448448
// Skip GLACIER and GLACIER_IR objects.
449449
if obj.StorageClass == nil || strings.Contains(*obj.StorageClass, "GLACIER") {
450450
ctx.Logger().V(5).Info("Skipping object in storage class", "storage_class", *obj.StorageClass)
451-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "storage_class")
451+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "storage_class", float64(*obj.Size))
452452
if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil {
453453
ctx.Logger().Error(err, "could not update progress for glacier object")
454454
}
@@ -458,7 +458,7 @@ func (s *Source) pageChunker(
458458
// Ignore large files.
459459
if *obj.Size > s.maxObjectSize {
460460
ctx.Logger().V(5).Info("Skipping %d byte file (over maxObjectSize limit)")
461-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "size_limit")
461+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "size_limit", float64(*obj.Size))
462462
if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil {
463463
ctx.Logger().Error(err, "could not update progress for large file")
464464
}
@@ -468,7 +468,7 @@ func (s *Source) pageChunker(
468468
// File empty file.
469469
if *obj.Size == 0 {
470470
ctx.Logger().V(5).Info("Skipping empty file")
471-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "empty_file")
471+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "empty_file", 0)
472472
if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil {
473473
ctx.Logger().Error(err, "could not update progress for empty file")
474474
}
@@ -478,7 +478,7 @@ func (s *Source) pageChunker(
478478
// Skip incompatible extensions.
479479
if common.SkipFile(*obj.Key) {
480480
ctx.Logger().V(5).Info("Skipping file with incompatible extension")
481-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "incompatible_extension")
481+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "incompatible_extension", float64(*obj.Size))
482482
if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil {
483483
ctx.Logger().Error(err, "could not update progress for incompatible file")
484484
}
@@ -493,7 +493,7 @@ func (s *Source) pageChunker(
493493

494494
if strings.HasSuffix(*obj.Key, "/") {
495495
ctx.Logger().V(5).Info("Skipping directory")
496-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "directory")
496+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "directory", float64(*obj.Size))
497497
return nil
498498
}
499499

@@ -521,7 +521,7 @@ func (s *Source) pageChunker(
521521
if err != nil {
522522
if strings.Contains(err.Error(), "AccessDenied") {
523523
ctx.Logger().Error(err, "could not get S3 object; access denied")
524-
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "access_denied")
524+
s.metricsCollector.RecordObjectSkipped(metadata.bucket, "access_denied", float64(*obj.Size))
525525
} else {
526526
ctx.Logger().Error(err, "could not get S3 object")
527527
s.metricsCollector.RecordObjectError(metadata.bucket)
@@ -596,7 +596,7 @@ func (s *Source) pageChunker(
596596
if err := s.checkpointer.UpdateObjectCompletion(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil {
597597
ctx.Logger().Error(err, "could not update progress for scanned object")
598598
}
599-
s.metricsCollector.RecordObjectScanned(metadata.bucket)
599+
s.metricsCollector.RecordObjectScanned(metadata.bucket, float64(*obj.Size))
600600

601601
return nil
602602
})

0 commit comments

Comments
 (0)