Skip to content

Commit 9e99c85

Browse files
auto-revert-processorEvergreen Agent
authored andcommitted
Revert "SERVER-82197 Handle collation in KeyString in case of Object/array"
This reverts commit 96841a6.
1 parent e501014 commit 9e99c85

File tree

2 files changed

+85
-197
lines changed

2 files changed

+85
-197
lines changed
Lines changed: 73 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -1,148 +1,73 @@
1-
/**
2-
* Tests $group execution with increased spilling and a non-simple collation.
3-
*
4-
* @tags: [requires_sbe]
5-
*/
6-
7-
import {assertArrayEq} from "jstests/aggregation/extras/utils.js";
8-
9-
const conn = MongoRunner.runMongod();
10-
const db = conn.getDB('test');
11-
12-
const coll = db.group_pushdown_with_collation;
13-
coll.drop();
14-
for (let i = 0; i < 1000; i++) {
15-
if (i % 3 === 0) {
16-
assert.commandWorked(coll.insert({x: 'a'}));
17-
} else {
18-
assert.commandWorked(coll.insert({x: 'A'}));
19-
}
20-
}
21-
22-
// Test that accumulators respect the collation when the group operation spills to disk.
23-
assert.commandWorked(db.adminCommand(
24-
{setParameter: 1, internalQuerySlotBasedExecutionHashAggForceIncreasedSpilling: true}));
25-
const caseInsensitive = {
26-
collation: {locale: "en_US", strength: 2}
27-
};
28-
let results =
29-
coll.aggregate([{$group: {_id: null, result: {$addToSet: "$x"}}}], caseInsensitive).toArray();
30-
assert.eq(1, results.length, results);
31-
assert.eq({_id: null, result: ["a"]}, results[0]);
32-
33-
// Test that comparisons of the group key respect the collation when the group operation spills to
34-
// disk.
35-
coll.drop();
36-
for (let i = 0; i < 1000; i++) {
37-
if (i % 5 === 0) {
38-
if (i % 10 === 0) {
39-
assert.commandWorked(coll.insert({x: 'b', y: 'D'}));
40-
} else {
41-
assert.commandWorked(coll.insert({x: 'b', y: 'e'}));
42-
}
43-
} else {
44-
assert.commandWorked(coll.insert({x: 'B', y: 'd'}));
45-
}
46-
}
47-
48-
results =
49-
coll.aggregate(
50-
[{
51-
$group:
52-
{_id: {X: "$x", Y: "$y"}, val: {$first: {$toLower: "$y"}}, count: {$count: {}}}
53-
}],
54-
caseInsensitive)
55-
.toArray();
56-
assertArrayEq({
57-
actual: results,
58-
expected: [{val: "d", count: 900}, {val: "e", count: 100}],
59-
fieldsToSkip: ["_id"]
60-
});
61-
62-
// Re-issue the query with the simple collation and check that the grouping becomes case-sensitive.
63-
results =
64-
coll.aggregate([{
65-
$group: {_id: {X: "$x", Y: "$y"}, val: {$first: {$toLower: "$y"}}, count: {$count: {}}}
66-
}])
67-
.toArray();
68-
assertArrayEq({
69-
actual: results,
70-
expected: [{val: "d", count: 800}, {val: "d", count: 100}, {val: "e", count: 100}],
71-
fieldsToSkip: ["_id"]
72-
});
73-
74-
// Test that comparisons of the group key respect the collation when the group operation spills to
75-
// disk and the key is an array.
76-
coll.drop();
77-
assert.commandWorked(coll.insertMany([
78-
{"_id": 0, key: ["A", "b"]},
79-
{"_id": 1, key: ["A", "B"]},
80-
{"_id": 2, key: ["B"]},
81-
{"_id": 3, key: ["b"]},
82-
{"_id": 4, key: ["a", "B"]},
83-
{"_id": 5, key: ["a", "b"]},
84-
]));
85-
86-
results = coll.aggregate([{$group: {_id: "$key", count: {$count: {}}}}], caseInsensitive).toArray();
87-
assertArrayEq({
88-
actual: results,
89-
expected: [{_id: ["A", "b"], count: 4}, {_id: ["B"], count: 2}],
90-
});
91-
92-
// Re-issue the query with the simple collation and check that the grouping becomes case-sensitive.
93-
results = coll.aggregate([{$group: {_id: "$key", count: {$count: {}}}}]).toArray();
94-
95-
assertArrayEq({
96-
actual: results,
97-
expected: [
98-
{_id: ["A", "b"], count: 1},
99-
{_id: ["A", "B"], count: 1},
100-
{_id: ["B"], count: 1},
101-
{_id: ["b"], count: 1},
102-
{_id: ["a", "B"], count: 1},
103-
{_id: ["a", "b"], count: 1}
104-
],
105-
});
106-
107-
// Test that comparisons of the group key respect the collation when the group operation spills to
108-
// disk and the key is an object.
109-
coll.drop();
110-
for (let i = 0; i < 1000; i++) {
111-
if (i % 5 === 0) {
112-
assert.commandWorked(coll.insert({x: {val: 'A'}}));
113-
} else {
114-
assert.commandWorked(coll.insert({x: {val: 'a'}}));
115-
}
116-
}
117-
for (let i = 0; i < 1000; i++) {
118-
if (i % 5 === 0) {
119-
assert.commandWorked(coll.insert({x: {val: 'b'}}));
120-
} else {
121-
assert.commandWorked(coll.insert({x: {val: 'B'}}));
122-
}
123-
}
124-
125-
results = coll.aggregate([{$group: {_id: {X: "$x"}, firstX: {$first: "$x"}, count: {$count: {}}}}],
126-
caseInsensitive)
127-
.toArray();
128-
assertArrayEq({
129-
actual: results,
130-
expected: [{firstX: {val: "b"}, count: 1000}, {firstX: {val: "A"}, count: 1000}],
131-
fieldsToSkip: ["_id"]
132-
});
133-
134-
// Re-issue the query with the simple collation and check that the grouping becomes case-sensitive.
135-
results = coll.aggregate([{$group: {_id: {X: "$x"}, firstX: {$first: "$x"}, count: {$count: {}}}}])
136-
.toArray();
137-
assertArrayEq({
138-
actual: results,
139-
expected: [
140-
{firstX: {val: "b"}, count: 200},
141-
{firstX: {val: "B"}, count: 800},
142-
{firstX: {val: "a"}, count: 800},
143-
{firstX: {val: "A"}, count: 200}
144-
],
145-
fieldsToSkip: ["_id"]
146-
});
147-
148-
MongoRunner.stopMongod(conn);
1+
/**
2+
* Tests $group execution with increased spilling and a non-simple collation.
3+
*
4+
* @tags: [requires_sbe]
5+
*/
6+
7+
import {assertArrayEq} from "jstests/aggregation/extras/utils.js";
8+
9+
const conn = MongoRunner.runMongod();
10+
const db = conn.getDB('test');
11+
const coll = db.group_pushdown_with_collation;
12+
coll.drop();
13+
for (let i = 0; i < 1000; i++) {
14+
if (i % 3 === 0) {
15+
assert.commandWorked(coll.insert({x: 'a'}));
16+
} else {
17+
assert.commandWorked(coll.insert({x: 'A'}));
18+
}
19+
}
20+
21+
// Test that accumulators respect the collation when the group operation spills to disk.
22+
assert.commandWorked(db.adminCommand(
23+
{setParameter: 1, internalQuerySlotBasedExecutionHashAggForceIncreasedSpilling: true}));
24+
const caseInsensitive = {
25+
collation: {locale: "en_US", strength: 2}
26+
};
27+
let results =
28+
coll.aggregate([{$group: {_id: null, result: {$addToSet: "$x"}}}], caseInsensitive).toArray();
29+
assert.eq(1, results.length, results);
30+
assert.eq({_id: null, result: ["a"]}, results[0]);
31+
32+
// Test that comparisons of the group key respect the collation when the group operation spills to
33+
// disk.
34+
coll.drop();
35+
for (let i = 0; i < 1000; i++) {
36+
if (i % 5 === 0) {
37+
if (i % 10 === 0) {
38+
assert.commandWorked(coll.insert({x: 'b', y: 'D'}));
39+
} else {
40+
assert.commandWorked(coll.insert({x: 'b', y: 'e'}));
41+
}
42+
} else {
43+
assert.commandWorked(coll.insert({x: 'B', y: 'd'}));
44+
}
45+
}
46+
47+
results =
48+
coll.aggregate(
49+
[{
50+
$group:
51+
{_id: {X: "$x", Y: "$y"}, val: {$first: {$toLower: "$y"}}, count: {$count: {}}}
52+
}],
53+
caseInsensitive)
54+
.toArray();
55+
assertArrayEq({
56+
actual: results,
57+
expected: [{val: "d", count: 900}, {val: "e", count: 100}],
58+
fieldsToSkip: ["_id"]
59+
});
60+
61+
// Re-issue the query with the simple collation and check that the grouping becomes case-sensitive.
62+
results =
63+
coll.aggregate([{
64+
$group: {_id: {X: "$x", Y: "$y"}, val: {$first: {$toLower: "$y"}}, count: {$count: {}}}
65+
}])
66+
.toArray();
67+
assertArrayEq({
68+
actual: results,
69+
expected: [{val: "d", count: 800}, {val: "d", count: 100}, {val: "e", count: 100}],
70+
fieldsToSkip: ["_id"]
71+
});
72+
73+
MongoRunner.stopMongod(conn);

src/mongo/db/exec/sbe/values/row.cpp

Lines changed: 12 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -441,14 +441,6 @@ static void serializeValue(BufBuilder& buf, TypeTags tag, Value val) {
441441
}
442442
}
443443

444-
/**
445-
* If non-null 'collator' is provided during serialization, then the encoding guarantees that values
446-
* which are equal up to the collation will encode to the same result, allowing for collation-aware
447-
* equality comparisons. However, the collator-aware encoded values are not always decodable and the
448-
* key string equality no longer implies equality of the original values. Groups store an original
449-
* copy of the group key alongside the encoded key string when there is a collation, so the key
450-
* string does not need to be decodable.
451-
*/
452444
static void serializeValueIntoKeyString(key_string::Builder& buf,
453445
TypeTags tag,
454446
Value val,
@@ -537,45 +529,28 @@ static void serializeValueIntoKeyString(key_string::Builder& buf,
537529
break;
538530
}
539531
case TypeTags::bsonSymbol: {
540-
// Note that the collation would have to apply to bsonSymbol values to match the
541-
// behavior of the Classic engine when spilling groups to disk. bsonSymbol is
542-
// deprecated, however, and SBE no longer provides strict correctness guarantees about
543-
// computations on bsonSymbol values.
544532
buf.appendBool(true);
545533
buf.appendSymbol(getStringOrSymbolView(tag, val));
546534
break;
547535
}
548536
case TypeTags::ArrayMultiSet:
549537
case TypeTags::ArraySet:
550538
case TypeTags::Array: {
539+
// TODO SERVER-61629: convert this to serialize the 'arr' directly instead of
540+
// constructing a BSONArray.
541+
BSONArrayBuilder builder;
542+
bson::convertToBsonArr(builder, value::ArrayEnumerator{tag, val});
551543
buf.appendBool(true);
552-
553-
if (collator) {
554-
// Using 'hashValue()' guarantees that two arrays whose elements are equal up to the
555-
// collation will translate to the same key string output.
556-
buf.appendNumberLong(hashValue(tag, val, collator));
557-
} else {
558-
// TODO SERVER-61629: convert this to serialize the 'arr' directly instead of
559-
// constructing a BSONArray.
560-
BSONArrayBuilder builder;
561-
bson::convertToBsonArr(builder, value::ArrayEnumerator{tag, val});
562-
buf.appendArray(BSONArray(builder.done()));
563-
}
544+
buf.appendArray(BSONArray(builder.done()));
564545
break;
565546
}
566547
case TypeTags::Object: {
548+
// TODO SERVER-61629: convert this to serialize the 'obj' directly instead of
549+
// constructing a BSONObj.
550+
BSONObjBuilder builder;
551+
bson::convertToBsonObj(builder, getObjectView(val));
567552
buf.appendBool(true);
568-
if (collator) {
569-
// Using 'hashValue()' guarantees that two objects whose elements are equal up to
570-
// the collation will translate to the same key string output.
571-
buf.appendNumberLong(hashValue(tag, val, collator));
572-
} else {
573-
// TODO SERVER-61629: convert this to serialize the 'obj' directly instead of
574-
// constructing a BSONObj.
575-
BSONObjBuilder builder;
576-
bson::convertToBsonObj(builder, getObjectView(val));
577-
buf.appendObject(builder.done());
578-
}
553+
buf.appendObject(builder.done());
579554
break;
580555
}
581556
case TypeTags::ObjectId: {
@@ -585,24 +560,12 @@ static void serializeValueIntoKeyString(key_string::Builder& buf,
585560
}
586561
case TypeTags::bsonObject: {
587562
buf.appendBool(true);
588-
if (collator) {
589-
// Using 'hashValue()' guarantees that two objects whose elements are equal up to
590-
// the collation will translate to the same key string output.
591-
buf.appendNumberLong(hashValue(tag, val, collator));
592-
} else {
593-
buf.appendObject(BSONObj(getRawPointerView(val)));
594-
}
563+
buf.appendObject(BSONObj(getRawPointerView(val)));
595564
break;
596565
}
597566
case TypeTags::bsonArray: {
598567
buf.appendBool(true);
599-
if (collator) {
600-
// Using 'hashValue()' guarantees that two arrays whose elements are equal up to the
601-
// collation will translate to the same key string output.
602-
buf.appendNumberLong(hashValue(tag, val, collator));
603-
} else {
604-
buf.appendArray(BSONArray(BSONObj(getRawPointerView(val))));
605-
}
568+
buf.appendArray(BSONArray(BSONObj(getRawPointerView(val))));
606569
break;
607570
}
608571
case TypeTags::bsonObjectId: {

0 commit comments

Comments
 (0)