Skip to content

Commit 1f1a5a1

Browse files
authored
REP-6465 Fix handling of dotted shard keys (#127)
This fixes Migration Verifier’s logic to assemble a document key given a full document and field names when any of those fields has a dot. When the server routes a document whose collection is sharded on, e.g., `{foo.bar: 1}`, it does so based on the value of /foo/bar (i.e., the `bar` field of the top-level embedded document named `foo`) in the document. Migration Verifier instead assembled its document key from the value at /foo.bar (i.e., a top-level field named `foo.bar`). This fixes the bug. Notably, it also avoids the problem, documented in SERVER-109340, where the server’s change stream’s `documentKey` inaccurately reports the values used to route the document.
1 parent 0c8bd1e commit 1f1a5a1

File tree

8 files changed

+608
-32
lines changed

8 files changed

+608
-32
lines changed

dockey/agg.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Package dockey contains logic related to document key determination.
2+
// Its tests use a cluster and thus are stored in internal/verifier.
3+
4+
package dockey
5+
6+
import (
7+
"maps"
8+
"slices"
9+
"strconv"
10+
11+
"github.com/samber/lo"
12+
"go.mongodb.org/mongo-driver/bson"
13+
)
14+
15+
// ExtractTrueDocKeyAgg returns an aggregation expression that extracts the
16+
// document key from the document to which the `docExpr` refers.
17+
//
18+
// NB: This avoids the problem documented in SERVER-109340; as a result,
19+
// the returned key may not always match the change stream’s `documentKey`
20+
// (because the server misreports its own sharding logic).
21+
func ExtractTrueDocKeyAgg(fieldNames []string, docExpr string) bson.D {
22+
assertFieldNameUniqueness(fieldNames)
23+
24+
var docKeyNumKeys bson.D
25+
numToKeyLookup := map[string]string{}
26+
27+
for n, name := range fieldNames {
28+
var valExpr = docExpr + "." + name
29+
30+
// Aggregation forbids direct creation of an object with dotted keys.
31+
// So here we create an object with numeric keys, then below we’ll
32+
// map the numeric keys back to the real ones.
33+
34+
nStr := strconv.Itoa(n)
35+
docKeyNumKeys = append(docKeyNumKeys, bson.E{nStr, valExpr})
36+
numToKeyLookup[nStr] = name
37+
}
38+
39+
// Now convert the numeric keys back to the real ones.
40+
return mapObjectKeysAgg(docKeyNumKeys, numToKeyLookup)
41+
}
42+
43+
// Potentially reusable:
44+
func mapObjectKeysAgg(expr any, mapping map[string]string) bson.D {
45+
// We would ideally pass mapping into the aggregation and $getField
46+
// to get the mapped key, but pre-v8 server versions required $getField’s
47+
// field parameter to be a constant. (And pre-v5 didn’t have $getField
48+
// at all.) So we use a $switch instead.
49+
mapAgg := bson.D{
50+
{"$switch", bson.D{
51+
{"branches", lo.Map(
52+
slices.Collect(maps.Keys(mapping)),
53+
func(key string, _ int) bson.D {
54+
return bson.D{
55+
{"case", bson.D{
56+
{"$eq", bson.A{
57+
key,
58+
"$$numericKey",
59+
}},
60+
}},
61+
{"then", mapping[key]},
62+
}
63+
},
64+
)},
65+
}},
66+
}
67+
68+
return bson.D{
69+
{"$arrayToObject", bson.D{
70+
{"$map", bson.D{
71+
{"input", bson.D{
72+
{"$objectToArray", expr},
73+
}},
74+
{"in", bson.D{
75+
{"$let", bson.D{
76+
{"vars", bson.D{
77+
{"numericKey", "$$this.k"},
78+
{"value", "$$this.v"},
79+
}},
80+
{"in", bson.D{
81+
{"k", mapAgg},
82+
{"v", "$$value"},
83+
}},
84+
}},
85+
}},
86+
}},
87+
}},
88+
}
89+
}

dockey/agg_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package dockey
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestAggPanic(t *testing.T) {
10+
assert.Panics(
11+
t,
12+
func() {
13+
ExtractTrueDocKeyAgg(
14+
[]string{"foo", "bar", "foo"},
15+
"$$ROOT",
16+
)
17+
},
18+
"duplicate field name should cause panic",
19+
)
20+
}

dockey/raw.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package dockey
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"github.com/pkg/errors"
8+
"github.com/samber/lo"
9+
"go.mongodb.org/mongo-driver/bson"
10+
"go.mongodb.org/mongo-driver/x/bsonx/bsoncore"
11+
)
12+
13+
// This extracts the document key from a document gets its field names.
14+
//
15+
// NB: This avoids the problem documented in SERVER-109340; as a result,
16+
// the returned key may not always match the change stream’s `documentKey`
17+
// (because the server misreports its own sharding logic).
18+
func ExtractTrueDocKeyFromDoc(
19+
fieldNames []string,
20+
doc bson.Raw,
21+
) (bson.Raw, error) {
22+
assertFieldNameUniqueness(fieldNames)
23+
24+
var dk bson.D
25+
for _, field := range fieldNames {
26+
27+
// This is how sharding routes documents: it always
28+
// splits on the dot and looks deeply into the document.
29+
parts := strings.Split(field, ".")
30+
val, err := doc.LookupErr(parts...)
31+
32+
if errors.Is(err, bsoncore.ErrElementNotFound) || errors.As(err, &bsoncore.InvalidDepthTraversalError{}) {
33+
// If the document lacks a value for this field
34+
// then don’t add it to the document key.
35+
continue
36+
} else if err == nil {
37+
dk = append(dk, bson.E{field, val})
38+
} else {
39+
return nil, errors.Wrapf(err, "extracting doc key field %#q from doc %+v", field, doc)
40+
}
41+
}
42+
43+
docKey, err := bson.Marshal(dk)
44+
if err != nil {
45+
return nil, errors.Wrapf(err, "marshaling doc key %v from doc %v", dk, docKey)
46+
}
47+
48+
return docKey, nil
49+
}
50+
51+
func assertFieldNameUniqueness(fieldNames []string) {
52+
if len(lo.Uniq(fieldNames)) != len(fieldNames) {
53+
panic(fmt.Sprintf("Duplicate field names: %v", fieldNames))
54+
}
55+
}

dockey/raw_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package dockey
2+
3+
import (
4+
"slices"
5+
"testing"
6+
7+
"github.com/10gen/migration-verifier/dockey/test"
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
"go.mongodb.org/mongo-driver/bson"
11+
)
12+
13+
func TestExtractTrueDocKeyFromDoc(t *testing.T) {
14+
for _, reverseYN := range []bool{false, true} {
15+
fieldNames := slices.Clone(test.FieldNames)
16+
17+
if reverseYN {
18+
slices.Reverse(fieldNames)
19+
}
20+
21+
for _, curCase := range test.TestCases {
22+
raw, err := bson.Marshal(curCase.Doc)
23+
require.NoError(t, err)
24+
25+
computedRaw, err := ExtractTrueDocKeyFromDoc(
26+
fieldNames,
27+
raw,
28+
)
29+
require.NoError(t, err)
30+
31+
var computedDocKey bson.D
32+
require.NoError(t, bson.Unmarshal(computedRaw, &computedDocKey))
33+
34+
expectedDocKey := slices.Clone(curCase.DocKey)
35+
if reverseYN {
36+
slices.Reverse(expectedDocKey)
37+
}
38+
39+
assert.Equal(
40+
t,
41+
expectedDocKey,
42+
computedDocKey,
43+
"doc key for %v (fieldNames: %v)",
44+
bson.Raw(raw),
45+
fieldNames,
46+
)
47+
}
48+
}
49+
50+
assert.Panics(
51+
t,
52+
func() {
53+
_, _ = ExtractTrueDocKeyFromDoc(
54+
[]string{"foo", "bar", "foo"},
55+
bson.Raw{0},
56+
)
57+
},
58+
"duplicate field name should cause panic",
59+
)
60+
}

dockey/test/cases.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package test
2+
3+
import (
4+
"github.com/10gen/migration-verifier/mslices"
5+
"go.mongodb.org/mongo-driver/bson"
6+
)
7+
8+
type TestCase struct {
9+
Doc bson.D
10+
DocKey bson.D
11+
}
12+
13+
var FieldNames = mslices.Of("_id", "foo.bar.baz")
14+
15+
var TestCases = []TestCase{
16+
{
17+
Doc: bson.D{
18+
{"_id", "abc"},
19+
{"foo", bson.D{
20+
{"bar", bson.D{{"baz", 1}}},
21+
{"bar.baz", 2},
22+
}},
23+
{"foo.bar", bson.D{{"baz", 3}}},
24+
{"foo.bar.baz", 4},
25+
},
26+
DocKey: bson.D{
27+
{"_id", "abc"},
28+
{"foo.bar.baz", int32(1)},
29+
},
30+
},
31+
{
32+
Doc: bson.D{
33+
{"_id", "bbb"},
34+
{"foo", bson.D{
35+
{"bar", bson.D{{"baz", 1}}},
36+
{"bar.baz", 2},
37+
}},
38+
{"foo.bar", bson.D{{"baz", 3}}},
39+
},
40+
DocKey: bson.D{
41+
{"_id", "bbb"},
42+
{"foo.bar.baz", int32(1)},
43+
},
44+
},
45+
{
46+
Doc: bson.D{
47+
{"_id", "ccc"},
48+
{"foo", bson.D{
49+
{"bar.baz", 2},
50+
}},
51+
{"foo.bar", bson.D{{"baz", 3}}},
52+
},
53+
DocKey: bson.D{
54+
{"_id", "ccc"},
55+
},
56+
},
57+
{
58+
Doc: bson.D{
59+
{"_id", "ddd"},
60+
{"foo", bson.D{
61+
{"bar", bson.D{{"baz", nil}}},
62+
}},
63+
},
64+
DocKey: bson.D{
65+
{"_id", "ddd"},
66+
{"foo.bar.baz", nil},
67+
},
68+
},
69+
{
70+
Doc: bson.D{
71+
{"_id", "eee"},
72+
{"foo", "bar"},
73+
},
74+
DocKey: bson.D{
75+
{"_id", "eee"},
76+
},
77+
},
78+
}

0 commit comments

Comments
 (0)