Skip to content

Commit b5891a4

Browse files
authored
aws: misc fixes (#863)
1 parent f6e1fcd commit b5891a4

File tree

7 files changed

+479
-71
lines changed

7 files changed

+479
-71
lines changed

internal/test/integration/red_test_python_aws_s3.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,6 @@ func assertS3Operation(t require.TestingT, op, expectedKey string) {
7777

7878
tag, found = jaeger.FindIn(span.Tags, "cloud.region")
7979
require.True(t, found)
80-
require.Empty(t, tag.Value)
80+
// localstack doesn't have a region, so we should match the default AWS one which is "us-east-1"
81+
require.Equal(t, "us-east-1", tag.Value)
8182
}

internal/test/integration/red_test_python_aws_sqs.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ func assertSQSOperation(t require.TestingT, op, expectedQueueURL, expectedMessag
7676

7777
tag, found = jaeger.FindIn(span.Tags, "cloud.region")
7878
require.True(t, found)
79-
require.Empty(t, tag.Value)
79+
// localstack doesn't have a region, so we should match the default AWS one which is "us-east-1"
80+
require.Equal(t, "us-east-1", tag.Value)
8081

8182
tag, found = jaeger.FindIn(span.Tags, "aws.sqs.queue_url")
8283
require.True(t, found)

pkg/appolly/app/request/span_getters.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ func spanOTELGetters(name attr.Name) (attributes.Getter[*Span, attribute.KeyValu
218218
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSS3 && s.AWS != nil {
219219
return AWSRequestID(s.AWS.S3.Meta.RequestID)
220220
}
221+
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSSQS && s.AWS != nil {
222+
return AWSRequestID(s.AWS.SQS.Meta.RequestID)
223+
}
221224
return AWSRequestID("")
222225
}
223226
case attr.AWSExtendedRequestID:
@@ -253,6 +256,9 @@ func spanOTELGetters(name attr.Name) (attributes.Getter[*Span, attribute.KeyValu
253256
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSS3 && s.AWS != nil {
254257
return CloudRegion(s.AWS.S3.Meta.Region)
255258
}
259+
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSSQS && s.AWS != nil {
260+
return CloudRegion(s.AWS.SQS.Meta.Region)
261+
}
256262
return CloudRegion("")
257263
}
258264
case attr.DNSQuestionName:

pkg/ebpf/common/http/aws_common.go

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ var (
2020
extendedRequestIDHeader = "x-amz-id-2"
2121
)
2222

23-
var awsRegionRgx = regexp.MustCompile(`(?:^|\.)([a-z]{2}-[a-z]+-\d)\.amazonaws\.com$`)
23+
var (
24+
awsRegionURLRgx = regexp.MustCompile(`(?:^|\.)([a-z0-9-]+)\.amazonaws\.com(\.[a-z]+)?$`)
25+
awsRegionURLRgx2 = regexp.MustCompile(`([a-z0-9-]+)-([a-z0-9-]+)\.amazonaws\.com(\.[a-z]+)?$`)
26+
awsRegionRgx = regexp.MustCompile(`^[a-z]{2}(-gov)?-[a-z]+-\d+$`)
27+
awsRegionRgx2 = regexp.MustCompile(`^cn-[a-z]+-\d+$`)
28+
)
2429

2530
func parseAWSMeta(req *http.Request, resp *http.Response) (request.AWSMeta, error) {
2631
meta := request.AWSMeta{}
@@ -47,10 +52,46 @@ func parseAWSMeta(req *http.Request, resp *http.Response) (request.AWSMeta, erro
4752
return meta, nil
4853
}
4954

55+
// parseAWSRegion extracts the AWS region from the Host in a request.
56+
// It supports both virtual-hosted–style and path-style endpoints.
57+
// If no explicit region is found, the default region ("us-east-1") is returned.
58+
//
59+
// Examples:
60+
//
61+
// Host: bucket.s3.eu-west-1.amazonaws.com => "eu-west-1"
62+
// Host: bucket.s3.amazonaws.com => "us-east-1"
63+
// Host: ec2.us-west-2.amazonaws.com => "us-west-2"
64+
// Host: s3.eu-central-1.amazonaws.com => "eu-central-1"
65+
// Host: sns.cn-north-1.amazonaws.com.cn => "cn-north-1"
66+
// Host: sts.amazonaws.com => "us-east-1" (default)
5067
func parseAWSRegion(req *http.Request) string {
51-
match := awsRegionRgx.FindStringSubmatch(req.URL.Host)
52-
if len(match) >= 2 {
53-
return match[1]
68+
// Common AWS endpoint patterns:
69+
// <service>.<region>.amazonaws.com
70+
// <service>.<region>.amazonaws.com.cn
71+
// <service>.amazonaws.com
72+
//
73+
// Examples captured by this regex:
74+
// ec2.us-east-2.amazonaws.com => us-east-2
75+
// monitoring.us-gov-west-1.amazonaws.com => us-gov-west-1
76+
// s3.cn-north-1.amazonaws.com.cn => cn-north-1
77+
if m := awsRegionURLRgx.FindStringSubmatch(req.Host); len(m) >= 2 {
78+
if isAWSRegion(m[1]) {
79+
return m[1]
80+
}
5481
}
55-
return ""
82+
83+
// Fallback pattern for "service.s3.region.amazonaws.com" style:
84+
// bucket.s3.eu-west-1.amazonaws.com => eu-west-1
85+
if m := awsRegionURLRgx2.FindStringSubmatch(req.Host); len(m) >= 2 {
86+
if isAWSRegion(m[1]) {
87+
return m[1]
88+
}
89+
}
90+
91+
// Default AWS region when none is found
92+
return "us-east-1"
93+
}
94+
95+
func isAWSRegion(region string) bool {
96+
return awsRegionRgx.MatchString(region) || awsRegionRgx2.MatchString(region)
5697
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package ebpfcommon
5+
6+
import (
7+
"net/http"
8+
"testing"
9+
)
10+
11+
func TestParseAWSRegion(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
host string
15+
want string
16+
}{
17+
{
18+
name: "ec2 with region",
19+
host: "ec2.us-west-2.amazonaws.com",
20+
want: "us-west-2",
21+
},
22+
{
23+
name: "s3 with region",
24+
host: "s3.eu-central-1.amazonaws.com",
25+
want: "eu-central-1",
26+
},
27+
{
28+
name: "sns with cn region",
29+
host: "sns.cn-north-1.amazonaws.com.cn",
30+
want: "cn-north-1",
31+
},
32+
{
33+
name: "sts default region",
34+
host: "sts.amazonaws.com",
35+
want: "us-east-1",
36+
},
37+
{
38+
name: "bucket s3 eu-west-1",
39+
host: "bucket.s3.eu-west-1.amazonaws.com",
40+
want: "eu-west-1",
41+
},
42+
{
43+
name: "bucket s3 default region",
44+
host: "bucket.s3.amazonaws.com",
45+
want: "us-east-1",
46+
},
47+
{
48+
name: "monitoring us-gov-west-1",
49+
host: "monitoring.us-gov-west-1.amazonaws.com",
50+
want: "us-gov-west-1",
51+
},
52+
{
53+
name: "s3 cn-north-1 with .cn",
54+
host: "s3.cn-north-1.amazonaws.com.cn",
55+
want: "cn-north-1",
56+
},
57+
{
58+
name: "bucket s3 cn-north-1 with .cn",
59+
host: "bucket.s3.cn-north-1.amazonaws.com.cn",
60+
want: "cn-north-1",
61+
},
62+
{
63+
name: "service only",
64+
host: "s3.amazonaws.com",
65+
want: "us-east-1",
66+
},
67+
{
68+
name: "service only .cn",
69+
host: "s3.amazonaws.com.cn",
70+
want: "us-east-1",
71+
},
72+
{
73+
name: "bucket s3 dot region",
74+
host: "bucket.s3.us-west-2.amazonaws.com",
75+
want: "us-west-2",
76+
},
77+
{
78+
name: "empty host",
79+
host: "",
80+
want: "us-east-1",
81+
},
82+
{
83+
name: "random host",
84+
host: "example.com",
85+
want: "us-east-1",
86+
},
87+
{
88+
name: "service.region.amazonaws.com.cn",
89+
host: "ec2.ap-southeast-1.amazonaws.com.cn",
90+
want: "ap-southeast-1",
91+
},
92+
{
93+
name: "bucket s3 ap-southeast-2",
94+
host: "bucket.s3.ap-southeast-2.amazonaws.com",
95+
want: "ap-southeast-2",
96+
},
97+
{
98+
name: "bucket s3 ap-southeast-2 .cn",
99+
host: "bucket.s3.ap-southeast-2.amazonaws.com.cn",
100+
want: "ap-southeast-2",
101+
},
102+
{
103+
name: "service.region.amazonaws.com with numbers",
104+
host: "lambda.us-east-1.amazonaws.com",
105+
want: "us-east-1",
106+
},
107+
{
108+
name: "service.region.amazonaws.com with dash",
109+
host: "dynamodb.us-west-2.amazonaws.com",
110+
want: "us-west-2",
111+
},
112+
{
113+
name: "bucket s3 region with dot",
114+
host: "bucket.s3.us-west-1.amazonaws.com",
115+
want: "us-west-1",
116+
},
117+
{
118+
name: "service.region.amazonaws.com with .cn",
119+
host: "ec2.cn-northwest-1.amazonaws.com.cn",
120+
want: "cn-northwest-1",
121+
},
122+
{
123+
name: "bucket s3 region with .cn",
124+
host: "bucket.s3.cn-northwest-1.amazonaws.com.cn",
125+
want: "cn-northwest-1",
126+
},
127+
}
128+
129+
for _, tt := range tests {
130+
t.Run(tt.name, func(t *testing.T) {
131+
req := &http.Request{Host: tt.host}
132+
got := parseAWSRegion(req)
133+
if got != tt.want {
134+
t.Errorf("parseAWSRegion(%q) = %q, want %q", tt.host, got, tt.want)
135+
}
136+
})
137+
}
138+
}

pkg/ebpf/common/http/aws_s3.go

Lines changed: 70 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package ebpfcommon
55

66
import (
7-
"bytes"
87
"errors"
98
"net/http"
109
"strings"
@@ -38,7 +37,7 @@ func parseAWSS3(req *http.Request, resp *http.Response) (request.AWSS3, error) {
3837
if s3.Meta.ExtendedRequestID == "" {
3938
return s3, errors.New("missing x-amz-id-2 header")
4039
}
41-
s3.Bucket, s3.Key = parseS3bucketKey(req.URL.Path)
40+
s3.Bucket, s3.Key = parseS3bucketKey(req)
4241
s3.Method = inferS3Method(req)
4342
if s3.Method == "" {
4443
return s3, errors.New("unable to parse s3 operation")
@@ -47,71 +46,93 @@ func parseAWSS3(req *http.Request, resp *http.Response) (request.AWSS3, error) {
4746
return s3, nil
4847
}
4948

50-
func parseS3bucketKey(path string) (string, string) {
51-
// S3 paths are generally in the format 'PUT /bucket/key'
52-
var bucket, key string
53-
parts := bytes.SplitN([]byte(path), []byte("/"), 3)
54-
if len(parts) >= 2 {
55-
bucket = string(parts[1])
49+
// parseS3bucketKey extracts the S3 bucket name and object key from an HTTP request.
50+
// It supports both virtual-hosted-style (bucket.s3.region.amazonaws.com)
51+
// and path-style (s3.amazonaws.com/bucket/object) addressing.
52+
//
53+
// Examples:
54+
//
55+
// Host: my-bucket.s3.eu-west-1.amazonaws.com, Path: /foo/bar.txt
56+
// => ("my-bucket", "foo/bar.txt")
57+
//
58+
// Host: s3.amazonaws.com, Path: /my-bucket/foo/bar.txt
59+
// => ("my-bucket", "foo/bar.txt")
60+
//
61+
// Host: my-bucket.s3.amazonaws.com, Path: /
62+
// => ("my-bucket", "")
63+
func parseS3bucketKey(req *http.Request) (string, string) {
64+
path := strings.TrimPrefix(req.URL.Path, "/")
65+
66+
// Case 1: Virtual-hosted–style — bucket in the hostname.
67+
// Example: my-bucket.s3.amazonaws.com /foo/bar.txt
68+
if strings.Contains(req.Host, ".s3.") {
69+
bucket := strings.SplitN(req.Host, ".s3.", 2)[0]
70+
return bucket, path
71+
}
72+
73+
// Case 2: Path-style — bucket in the first path segment.
74+
// Example: s3.amazonaws.com /my-bucket/foo/bar.txt
75+
parts := strings.SplitN(path, "/", 2)
76+
if len(parts) == 0 || parts[0] == "" {
77+
return "", ""
5678
}
57-
if len(parts) == 3 {
58-
key = string(parts[2])
79+
80+
bucket := parts[0]
81+
key := ""
82+
if len(parts) > 1 {
83+
key = parts[1]
5984
}
6085
return bucket, key
6186
}
6287

6388
// This is a naive inference of S3 operations based on HTTP method and URL path/query
6489
func inferS3Method(req *http.Request) string {
65-
q := req.URL.Query()
66-
path := strings.Trim(strings.TrimPrefix(req.URL.Path, "/"), "/")
67-
parts := strings.Split(path, "/")
90+
path := strings.TrimPrefix(req.URL.Path, "/")
6891

69-
switch req.Method {
70-
case http.MethodGet:
71-
switch {
72-
case path == "":
73-
return "ListBuckets"
74-
case len(parts) == 1:
75-
return "ListObjects"
76-
case q.Has("uploads"):
77-
return "ListMultipartUploads"
78-
case q.Has("uploadId"):
79-
return "ListParts"
80-
default:
81-
return "GetObject"
92+
var bucket, object string
93+
// --- Virtual-hosted–style URL ---
94+
// Example: PUT bucket.s3.eu-west-1.amazonaws.com /hello.txt
95+
if strings.Contains(req.Host, ".s3.") {
96+
bucket = strings.SplitN(req.Host, ".s3.", 2)[0]
97+
object = path // path may be empty or "object-key"
98+
} else {
99+
// --- Path-style URL ---
100+
// Example: PUT s3.amazonaws.com /bucket/hello.txt
101+
parts := strings.SplitN(path, "/", 2)
102+
if len(parts) > 0 {
103+
bucket = parts[0]
82104
}
83-
case http.MethodPut:
84-
if q.Has("uploadId") && q.Has("partNumber") {
85-
return "UploadPart"
86-
}
87-
if q.Has("uploadId") {
88-
return "CompleteMultipartUpload"
105+
if len(parts) > 1 {
106+
object = parts[1]
89107
}
108+
}
90109

91-
switch len(parts) {
92-
case 1:
93-
// PUT /my-bucket -> Create bucket
110+
hasBucket := bucket != ""
111+
hasObject := object != ""
112+
113+
switch req.Method {
114+
case http.MethodPut:
115+
if hasBucket && !hasObject {
94116
return "CreateBucket"
95-
default:
96-
// PUT /my-bucket/object.txt
117+
}
118+
if hasBucket && hasObject {
97119
return "PutObject"
98120
}
99-
case http.MethodPost:
100-
if q.Has("uploads") {
101-
return "CreateMultipartUpload"
121+
case http.MethodDelete:
122+
if hasBucket && !hasObject {
123+
return "DeleteBucket"
102124
}
103-
if q.Has("uploadId") {
104-
return "CompleteMultipartUpload"
125+
if hasBucket && hasObject {
126+
return "DeleteObject"
105127
}
106-
return "PutObject"
107-
case http.MethodDelete:
108-
if q.Has("uploadId") {
109-
return "AbortMultipartUpload"
128+
case http.MethodGet:
129+
if !hasBucket {
130+
return "ListBuckets"
110131
}
111-
if len(parts) == 1 {
112-
return "DeleteBucket"
132+
if hasBucket && !hasObject {
133+
return "ListObjects"
113134
}
114-
return "DeleteObject"
135+
return "GetObject"
115136
}
116137

117138
return ""

0 commit comments

Comments
 (0)