Skip to content

Commit 76e63d8

Browse files
Merge pull request #9 from mintel/INFRA-43933_add_s3_access_logs_type
INFRA-43933 add s3 access logs as a type lambda promtail can parse
2 parents 583f7f3 + 6dc53a3 commit 76e63d8

File tree

4 files changed

+91
-4
lines changed

4 files changed

+91
-4
lines changed
30.3 MB
Binary file not shown.

tools/lambda-promtail/lambda-promtail/s3.go

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ const (
5050
LB_NLB_TYPE string = "net"
5151
LB_ALB_TYPE string = "app"
5252
WAF_LOG_TYPE string = "WAFLogs"
53+
S3_ACCESS_LOG_TYPE string = "s3accesslogs"
5354
)
5455

5556
var (
@@ -82,6 +83,8 @@ var (
8283
cloudfrontTimestampRegex = regexp.MustCompile(`(?P<timestamp>\d+-\d+-\d+\s\d+:\d+:\d+)`)
8384
wafFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P<account_id>\d+)\/(?P<type>WAFLogs)\/(?P<region>[\w-]+)\/(?P<src>[\w-]+)\/(?P<year>\d+)\/(?P<month>\d+)\/(?P<day>\d+)\/(?P<hour>\d+)\/(?P<minute>\d+)\/\d+\_waflogs\_[\w-]+_[\w-]+_\d+T\d+Z_\w+`)
8485
wafTimestampRegex = regexp.MustCompile(`"timestamp":\s*(?P<timestamp>\d+),`)
86+
s3AccessLogFilenameRegex = regexp.MustCompile(`(?:.*\/)?(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})-(?P<hour>\d{2})-(?P<minute>\d{2})-(?P<second>\d{2})-(?P<file_id>[A-F0-9]{16})$`)
87+
s3AccessLogTimestampRegex = regexp.MustCompile(`\[(?P<timestamp>\d{2}\/[A-Za-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2}\s[+\-]\d{4})\]`)
8588
parsers = map[string]parserConfig{
8689
FLOW_LOG_TYPE: {
8790
logTypeLabel: "s3_vpc_flow",
@@ -122,6 +125,15 @@ var (
122125
timestampRegex: wafTimestampRegex,
123126
timestampType: "unix",
124127
},
128+
S3_ACCESS_LOG_TYPE: {
129+
logTypeLabel: "s3_access",
130+
filenameRegex: s3AccessLogFilenameRegex,
131+
ownerLabelKey: "bucket",
132+
timestampRegex: s3AccessLogTimestampRegex,
133+
timestampFormat: "02/Jan/2006:15:04:05 -0700", // S3 access log format: [02/Jan/2006:15:04:05 -0700]
134+
timestampType: "string",
135+
skipHeaderCount: 0, // S3 access logs typically don't have headers
136+
},
125137
}
126138
)
127139

@@ -149,12 +161,29 @@ func parseS3Log(ctx context.Context, b *batch, labels map[string]string, obj io.
149161
}
150162
return fmt.Errorf("could not find parser for type %s", labels["type"])
151163
}
152-
gzreader, err := gzip.NewReader(obj)
153-
if err != nil {
164+
165+
defer obj.Close()
166+
bReader := bufio.NewReader(obj)
167+
peekBytes, err := bReader.Peek(2)
168+
if err != nil && err != io.EOF {
154169
return err
155170
}
171+
172+
var reader io.Reader
173+
// Check if gzipped (magic bytes: 0x1F 0x8B)
174+
if len(peekBytes) >= 2 && peekBytes[0] == 0x1F && peekBytes[1] == 0x8B {
175+
gzreader, err := gzip.NewReader(bReader)
176+
if err != nil {
177+
return err
178+
}
179+
defer gzreader.Close()
180+
reader = gzreader
181+
} else {
182+
// Plain text - use the buffered reader as-is
183+
reader = bReader
184+
}
156185

157-
scanner := bufio.NewScanner(gzreader)
186+
scanner := bufio.NewScanner(reader)
158187

159188
ls := model.LabelSet{
160189
model.LabelName("__aws_log_type"): model.LabelValue(parser.logTypeLabel),
@@ -168,7 +197,9 @@ func parseS3Log(ctx context.Context, b *batch, labels map[string]string, obj io.
168197
if labels["type"] == CLOUDTRAIL_LOG_TYPE {
169198
records := make(chan Record)
170199
jsonStream := NewJSONStream(records)
171-
go jsonStream.Start(gzreader, parser.skipHeaderCount)
200+
// CloudTrail logs are always gzipped, so reader is always a *gzip.Reader (io.ReadCloser)
201+
readCloser := reader.(io.ReadCloser)
202+
go jsonStream.Start(readCloser, parser.skipHeaderCount)
172203
// Stream json file
173204
for record := range jsonStream.records {
174205
if record.Error != nil {

tools/lambda-promtail/lambda-promtail/s3_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,40 @@ func Test_getLabels(t *testing.T) {
394394
},
395395
wantErr: false,
396396
},
397+
{
398+
name: "s3_access_logs",
399+
args: args{
400+
record: events.S3EventRecord{
401+
AWSRegion: "us-west-2",
402+
S3: events.S3Entity{
403+
Bucket: events.S3Bucket{
404+
Name: "mintel-core-frontend-app-shell-prod-replica-logs",
405+
OwnerIdentity: events.S3UserIdentity{
406+
PrincipalID: "test",
407+
},
408+
},
409+
Object: events.S3Object{
410+
Key: "replica-access-logs/2025-12-06-00-00-26-D9C9ED584E11F3F8",
411+
},
412+
},
413+
},
414+
},
415+
want: map[string]string{
416+
"bucket": "mintel-core-frontend-app-shell-prod-replica-logs",
417+
"bucket_owner": "test",
418+
"bucket_region": "us-west-2",
419+
"day": "06",
420+
"file_id": "D9C9ED584E11F3F8",
421+
"hour": "00",
422+
"key": "replica-access-logs/2025-12-06-00-00-26-D9C9ED584E11F3F8",
423+
"minute": "00",
424+
"month": "12",
425+
"second": "26",
426+
"type": S3_ACCESS_LOG_TYPE,
427+
"year": "2025",
428+
},
429+
wantErr: false,
430+
},
397431
{
398432
name: "missing_type",
399433
args: args{
@@ -602,6 +636,27 @@ func Test_parseS3Log(t *testing.T) {
602636
},
603637
wantErr: false,
604638
},
639+
{
640+
name: "s3_access_logs",
641+
args: args{
642+
batchSize: 131072, // Set large enough we don't try and send to promtail
643+
filename: "../testdata/s3accesslog.txt",
644+
b: &batch{
645+
streams: map[string]*logproto.Stream{},
646+
},
647+
labels: map[string]string{
648+
"type": S3_ACCESS_LOG_TYPE,
649+
"bucket": "mintel-core-frontend-app-shell-prod-replica-logs",
650+
"file_id": "D9C9ED584E11F3F8",
651+
},
652+
},
653+
expectedLen: 1,
654+
expectedStream: `{__aws_log_type="s3_access", __aws_s3_access="D9C9ED584E11F3F8", __aws_s3_access_owner="mintel-core-frontend-app-shell-prod-replica-logs"}`,
655+
expectedTimestamps: []time.Time{
656+
time.Date(2025, time.December, 5, 23, 49, 46, 0, time.FixedZone("UTC", 0)),
657+
},
658+
wantErr: false,
659+
},
605660
{
606661
name: "missing_parser",
607662
args: args{
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
8a2b8c083616eedb641d19d09a3df1c58140281be5de78bd378053da98598f92 mintel-core-frontend-app-shell-prod-replica [05/Dec/2025:23:49:46 +0000] 64.252.71.196 - RWXJ337PHNKAXKAJ WEBSITE.GET.OBJECT gnpd-data-entry/projects/gnpd/image-sets/a38f9766-2dee-4f2d-bc44-e0c5965ee79a "GET /gnpd-data-entry/projects/gnpd/image-sets/a38f9766-2dee-4f2d-bc44-e0c5965ee79a?productInfo=Batch+Entry+175699%3A5+%7C+Iwatsuka+Seika+The+Hitotsumami+Pizza+Margarita+Flavoured+Rice+Cracker HTTP/1.1" 404 NoSuchKey 625 - 48 - "-" "Amazon CloudFront" - 8htnihf1feY67Nx1Yr6bflyfkVxBFtZmdiJTvsOo3P/DTqfkA6jsaT5IYoKdyIGJeORE+LtJM74LOoNS0joL4c7qVyr+aEdM - - - mintel-core-frontend-app-shell-prod-replica.s3-website-us-west-2.amazonaws.com - - -

0 commit comments

Comments
 (0)