Skip to content

Commit 0863285

Browse files
authored
Allow non-ASCII values in S3 PutObject user metadata (#9604)
* Test S3 PutObject with UserMetadata It verifies both ASCII and non-ASCII values. * Check metadata uploaded with S3 API is preserved in lakeFS API This verifies that Unicode actually is treated corrected. * Strip "X-Amz-Meta-" prefix before testing with lakeFS API See #9089 for why it is there in the first place. * Apply RFC 2047 coding to metadata headers This matches the S3 spec. * [CR] [bug] Add missing "return" after EncodeError calls I _also_ asked Claude to look for other missing returns of this fashion; it found these but no new ones.
1 parent 5f9e351 commit 0863285

File tree

5 files changed

+129
-7
lines changed

5 files changed

+129
-7
lines changed

esti/s3_gateway_test.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package esti
33
import (
44
"bytes"
55
"context"
6+
"errors"
67
"fmt"
78
"io"
89
"math/rand"
@@ -20,6 +21,7 @@ import (
2021
"github.com/aws/smithy-go/middleware"
2122
smithyhttp "github.com/aws/smithy-go/transport/http"
2223
"github.com/go-openapi/swag"
24+
"github.com/go-test/deep"
2325
"github.com/minio/minio-go/v7"
2426
"github.com/minio/minio-go/v7/pkg/credentials"
2527
"github.com/minio/minio-go/v7/pkg/tags"
@@ -902,6 +904,90 @@ func TestS3CopyObject(t *testing.T) {
902904
})
903905
}
904906

907+
var errMissingPrefix = errors.New("missing prefix")
908+
909+
// stripKeyPrefix strips prefix from all keys of m. It returns an error along with a stripped
910+
// map if not all keys have the prefix.
911+
func stripKeyPrefix[T any](prefix string, m map[string]T) (map[string]T, error) {
912+
ret := make(map[string]T, len(m))
913+
var err error
914+
for key, value := range m {
915+
stripped, ok := strings.CutPrefix(key, prefix)
916+
if !ok {
917+
err = errors.Join(err, fmt.Errorf("%w in %s", errMissingPrefix, key))
918+
}
919+
ret[stripped] = value
920+
}
921+
return ret, err
922+
}
923+
924+
func TestS3PutObjectUserMetadata(t *testing.T) {
925+
t.Parallel()
926+
ctx, _, repo := setupTest(t)
927+
defer tearDownTest(repo)
928+
929+
const contents = "the quick brown fox jumps over the lazy dog."
930+
// metadata are the metadata we want on the object.
931+
metadata := map[string]string{
932+
"Ascii": "value",
933+
"Non-Ascii": "והארץ היתה תהו ובהו", // "without form and void"
934+
}
935+
// encodedMetadata are the metadata a conforming client sends, with
936+
// values Q-word encoded according to RFC 2047.
937+
encodedMetadata := map[string]string{
938+
"Ascii": "value",
939+
"Non-Ascii": "=?utf-8?q?=D7=95=D7=94=D7=90=D7=A8=D7=A5_=D7=94=D7=99=D7=AA=D7=94_=D7=AA?= =?utf-8?q?=D7=94=D7=95_=D7=95=D7=91=D7=94=D7=95?=",
940+
}
941+
942+
// Headers are _supposed_ to be RFC 2047 encoded on upload. But this should actually
943+
// _always_ work, "non" is just illegal encoding.
944+
for _, encode := range []string{"rfc2047", "none"} {
945+
t.Run(encode, func(t *testing.T) {
946+
file := fmt.Sprintf("data/file.%s", encode)
947+
path := fmt.Sprintf("%sfile.%s", gatewayTestPrefix, encode)
948+
s3lakefsClient := newMinioClient(t, credentials.NewStaticV4)
949+
950+
metadataToSend := metadata
951+
if encode == "rfc2047" {
952+
metadataToSend = encodedMetadata
953+
}
954+
_, err := s3lakefsClient.PutObject(ctx, repo, path, strings.NewReader(contents), int64(len(contents)),
955+
minio.PutObjectOptions{
956+
UserMetadata: metadataToSend,
957+
})
958+
require.NoError(t, err, "putObject")
959+
960+
info, err := s3lakefsClient.StatObject(ctx, repo, path, minio.StatObjectOptions{})
961+
require.NoError(t, err, "statObject")
962+
963+
// The AWS golang SDK does _not_ decode RFC 2047 headers. The check
964+
// here is actually a minor bug because the gateway could choose a
965+
// different encoding for the same headers. But of course it uses the
966+
// same encoder we used, so this should never happen.
967+
if diffs := deep.Equal(info.UserMetadata, minio.StringMap(encodedMetadata)); diffs != nil {
968+
t.Errorf("Different user metadata from S3 gateway: %s", diffs)
969+
}
970+
971+
// The lakeFS API does not need to perform header encoding. Use it to
972+
// check that lakeFS sees the expected values.
973+
statsResp, err := client.StatObjectWithResponse(ctx, repo, mainBranch, &apigen.StatObjectParams{
974+
Path: file,
975+
UserMetadata: aws.Bool(true),
976+
})
977+
require.NoError(t, err, "Call statObject using lakeFS API")
978+
require.NoError(t, VerifyResponse(statsResp.HTTPResponse, statsResp.Body), "statObject using lakeFS API")
979+
980+
// Because of #9089, any user metadata uploaded through the S3 gateway
981+
// has a x-aws-meta- prefix.
982+
strippedMetadata, err := stripKeyPrefix("X-Amz-Meta-", statsResp.JSON200.Metadata.AdditionalProperties)
983+
assert.NoErrorf(t, err, "Failed to strip prefix from metadata keys in %+v", statsResp.JSON200.Metadata.AdditionalProperties)
984+
if diffs := deep.Equal(strippedMetadata, metadata); diffs != nil {
985+
t.Errorf("Different user metadata from API: %s", diffs)
986+
}
987+
})
988+
}
989+
}
990+
905991
func TestS3PutObjectTagging(t *testing.T) {
906992
t.Parallel()
907993
ctx, _, repo := setupTest(t)

pkg/gateway/errors/errors.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ const (
6969
ErrInvalidMaxUploads
7070
ErrInvalidMaxParts
7171
ErrInvalidPartNumberMarker
72+
ErrInvalidHeaderValue
7273
ErrInvalidRequestBody
7374
ErrInvalidCopySource
7475
ErrInvalidMetadataDirective
@@ -249,6 +250,13 @@ var Codes = errorCodeMap{
249250
Description: "Argument partNumberMarker must be an integer.",
250251
HTTPStatusCode: http.StatusBadRequest,
251252
},
253+
ErrInvalidHeaderValue: {
254+
// TODO(ariels): S3 itself encodes the bad header in XML tags ArgumentName,
255+
// ArgumentValue.
256+
Code: "InvalidArgument",
257+
Description: "Invalid header value.",
258+
HTTPStatusCode: http.StatusBadRequest,
259+
},
252260
ErrInvalidPolicyDocument: {
253261
Code: "InvalidPolicyDocument",
254262
Description: "The content of the form does not meet the conditions specified in the policy document.",

pkg/gateway/operations/operation_utils.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package operations
22

33
import (
4+
"errors"
5+
"mime"
46
"net/http"
57
"strings"
68
"time"
@@ -12,23 +14,33 @@ import (
1214

1315
const amzMetaHeaderPrefix = "X-Amz-Meta-"
1416

17+
var (
18+
rfc2047Decoder = new(mime.WordDecoder)
19+
)
20+
1521
// amzMetaAsMetadata prepare metadata based on amazon user metadata request headers
16-
func amzMetaAsMetadata(req *http.Request) catalog.Metadata {
22+
func amzMetaAsMetadata(req *http.Request) (catalog.Metadata, error) {
1723
metadata := make(catalog.Metadata)
24+
var err error
1825
for k := range req.Header {
1926
if strings.HasPrefix(k, amzMetaHeaderPrefix) {
20-
metadata[k] = req.Header.Get(k)
27+
value, decodeErr := rfc2047Decoder.DecodeHeader(req.Header.Get(k))
28+
if decodeErr != nil {
29+
err = errors.Join(err, decodeErr)
30+
continue
31+
}
32+
metadata[k] = value
2133
}
2234
}
23-
return metadata
35+
return metadata, err
2436
}
2537

2638
// amzMetaWriteHeaders set amazon user metadata on http response
2739
func amzMetaWriteHeaders(w http.ResponseWriter, metadata catalog.Metadata) {
2840
h := w.Header()
2941
for k, v := range metadata {
3042
if strings.HasPrefix(k, amzMetaHeaderPrefix) {
31-
h.Set(k, v)
43+
h.Set(k, mime.QEncoding.Encode("utf-8", v))
3244
}
3345
}
3446
}

pkg/gateway/operations/postobject.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,18 @@ func (controller *PostObject) HandleCreateMultipartUpload(w http.ResponseWriter,
6565
_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
6666
return
6767
}
68+
metadata, err := amzMetaAsMetadata(req)
69+
if err != nil {
70+
o.Log(req).WithError(err).Error("failed to decode user metadata")
71+
_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidHeaderValue))
72+
return
73+
}
6874
mpu := multipart.Upload{
6975
UploadID: resp.UploadID,
7076
Path: o.Path,
7177
CreationDate: time.Now(),
7278
PhysicalAddress: address,
73-
Metadata: map[string]string(amzMetaAsMetadata(req)),
79+
Metadata: map[string]string(metadata),
7480
ContentType: req.Header.Get("Content-Type"),
7581
}
7682
err = o.MultipartTracker.Create(req.Context(), mpu)

pkg/gateway/operations/putobject.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,12 @@ func handleCopy(w http.ResponseWriter, req *http.Request, o *PathOperation, copy
112112

113113
ctx := req.Context()
114114

115-
metadata := amzMetaAsMetadata(req)
115+
metadata, err := amzMetaAsMetadata(req)
116+
if err != nil {
117+
o.Log(req).WithError(err).Error("failed to decode user metadata")
118+
_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidHeaderValue))
119+
return
120+
}
116121
replaceMetadata := shouldReplaceMetadata(req)
117122

118123
entry, err := o.Catalog.CopyEntry(ctx, srcPath.Repo, srcPath.Reference, srcPath.Path, repository, branch, o.Path, replaceMetadata, metadata)
@@ -360,7 +365,12 @@ func handlePut(w http.ResponseWriter, req *http.Request, o *PathOperation) {
360365
}
361366

362367
// write metadata
363-
metadata := amzMetaAsMetadata(req)
368+
metadata, err := amzMetaAsMetadata(req)
369+
if err != nil {
370+
o.Log(req).WithError(err).Error("failed to decode user metadata")
371+
_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidHeaderValue))
372+
return
373+
}
364374
contentType := req.Header.Get("Content-Type")
365375
err = o.finishUpload(req, &blob.CreationDate, blob.Checksum, blob.PhysicalAddress, blob.Size, true, metadata, contentType, gravelerOpts)
366376
if errors.Is(err, graveler.ErrPreconditionFailed) {

0 commit comments

Comments
 (0)