Skip to content

Commit 95b038c

Browse files
borosaurusevergreen
authored andcommitted
SERVER-42433 use ProjectionAST in canonical query encoding
1 parent 00ad160 commit 95b038c

File tree

2 files changed

+80
-62
lines changed

2 files changed

+80
-62
lines changed

src/mongo/db/query/canonical_query_encoder.cpp

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "mongo/base/simple_string_data_comparator.h"
3939
#include "mongo/db/matcher/expression_array.h"
4040
#include "mongo/db/matcher/expression_geo.h"
41+
#include "mongo/db/query/projection.h"
4142
#include "mongo/util/log.h"
4243

4344
namespace mongo {
@@ -49,6 +50,7 @@ const char kEncodeChildrenEnd = ']';
4950
const char kEncodeChildrenSeparator = ',';
5051
const char kEncodeCollationSection = '#';
5152
const char kEncodeProjectionSection = '|';
53+
const char kEncodeProjectionRequirementSeparator = '-';
5254
const char kEncodeRegexFlagsSeparator = '/';
5355
const char kEncodeSortSection = '~';
5456

@@ -65,6 +67,7 @@ void encodeUserString(StringData s, StringBuilder* keyBuilder) {
6567
case kEncodeChildrenSeparator:
6668
case kEncodeCollationSection:
6769
case kEncodeProjectionSection:
70+
case kEncodeProjectionRequirementSeparator:
6871
case kEncodeRegexFlagsSeparator:
6972
case kEncodeSortSection:
7073
case '\\':
@@ -463,51 +466,48 @@ void encodeKeyForSort(const BSONObj& sortObj, StringBuilder* keyBuilder) {
463466
}
464467

465468
/**
466-
* Encodes parsed projection into cache key.
467-
* Does a simple toString() on each projected field
468-
* in the BSON object.
469-
* Orders the encoded elements in the projection by field name.
470-
* This handles all the special projection types ($meta, $elemMatch, etc.)
469+
* Encodes projection AST into a cache key.
470+
*
471+
* For projections which have a finite set of required fields (inclusion-only projections), encodes
472+
* those field names in order.
473+
*
474+
* For projections which require the entire document (exclusion projections, projections with
475+
* expressions), the projection section is empty.
471476
*/
472-
void encodeKeyForProj(const BSONObj& projObj, StringBuilder* keyBuilder) {
473-
// Sorts the BSON elements by field name using a map.
474-
std::map<StringData, BSONElement> elements;
477+
void encodeKeyForProj(const projection_ast::Projection* proj, StringBuilder* keyBuilder) {
478+
if (!proj || proj->requiresDocument()) {
479+
// Don't encode anything for the projection section to indicate the entire document is
480+
// required.
481+
return;
482+
}
475483

476-
BSONObjIterator it(projObj);
477-
while (it.more()) {
478-
BSONElement elt = it.next();
479-
StringData fieldName = elt.fieldNameStringData();
484+
std::vector<std::string> requiredFields = proj->getRequiredFields();
485+
invariant(!requiredFields.empty());
480486

481-
// Internal callers may add $-prefixed fields to the projection. These are not part of a
482-
// user query, and therefore are not considered part of the cache key.
483-
if (fieldName[0] == '$') {
484-
continue;
485-
}
487+
// Keep track of whether we appended the character marking the beginning of the projection
488+
// section. We may not have to if all of the fields in the projection are $-prefixed.
489+
bool appendedStart = false;
486490

487-
elements[fieldName] = elt;
488-
}
491+
// Encode the fields required by the projection in order.
492+
std::sort(requiredFields.begin(), requiredFields.end());
493+
for (auto&& requiredField : requiredFields) {
494+
invariant(!requiredField.empty());
489495

490-
if (!elements.empty()) {
491-
*keyBuilder << kEncodeProjectionSection;
492-
}
496+
// Internal callers (e.g, from mongos) may add "$sortKey" to the projection. This is not
497+
// part of the user query, and therefore are not considered part of the cache key.
498+
if (requiredField == "$sortKey") {
499+
continue;
500+
}
493501

494-
// Read elements in order of field name
495-
for (std::map<StringData, BSONElement>::const_iterator i = elements.begin();
496-
i != elements.end();
497-
++i) {
498-
const BSONElement& elt = (*i).second;
502+
const bool isFirst = !appendedStart;
499503

500-
if (elt.type() != BSONType::Object) {
501-
// For inclusion/exclusion projections, we encode as "i" or "e".
502-
*keyBuilder << (elt.trueValue() ? "i" : "e");
504+
if (isFirst) {
505+
*keyBuilder << kEncodeProjectionSection;
506+
appendedStart = true;
503507
} else {
504-
// For projection operators, we use the verbatim string encoding of the element.
505-
encodeUserString(elt.toString(false, // includeFieldName
506-
false), // full
507-
keyBuilder);
508+
*keyBuilder << kEncodeProjectionRequirementSeparator;
508509
}
509-
510-
encodeUserString(elt.fieldName(), keyBuilder);
510+
encodeUserString(requiredField, keyBuilder);
511511
}
512512
}
513513
} // namespace
@@ -518,7 +518,7 @@ CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) {
518518
StringBuilder keyBuilder;
519519
encodeKeyForMatch(cq.root(), &keyBuilder);
520520
encodeKeyForSort(cq.getQueryRequest().getSort(), &keyBuilder);
521-
encodeKeyForProj(cq.getQueryRequest().getProj(), &keyBuilder);
521+
encodeKeyForProj(cq.getProj(), &keyBuilder);
522522
encodeCollation(cq.getCollator(), &keyBuilder);
523523

524524
return keyBuilder.str();

src/mongo/db/query/canonical_query_encoder_test.cpp

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -123,50 +123,68 @@ TEST(CanonicalQueryEncoderTest, ComputeKey) {
123123
// With sort
124124
testComputeKey("{}", "{a: 1}", "{}", "an~aa");
125125
testComputeKey("{}", "{a: -1}", "{}", "an~da");
126-
testComputeKey("{}",
127-
"{a: {$meta: 'textScore'}}",
128-
"{a: {$meta: 'textScore'}}",
129-
"an~ta|{ $meta: \"textScore\" }a");
126+
testComputeKey("{}", "{a: {$meta: 'textScore'}}", "{a: {$meta: 'textScore'}}", "an~ta");
130127
testComputeKey("{a: 1}", "{b: 1}", "{}", "eqa~ab");
131128

132129
// With projection
133-
testComputeKey("{}", "{}", "{a: 1}", "an|ia");
134-
testComputeKey("{}", "{}", "{a: -1}", "an|ia");
135-
testComputeKey("{}", "{}", "{a: -1.0}", "an|ia");
136-
testComputeKey("{}", "{}", "{a: true}", "an|ia");
137-
testComputeKey("{}", "{}", "{a: 0}", "an|ea");
138-
testComputeKey("{}", "{}", "{a: false}", "an|ea");
139-
testComputeKey("{}", "{}", "{a: 99}", "an|ia");
140-
testComputeKey("{}", "{}", "{a: 'foo'}", "an|ia");
141-
testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}}", "an|{ $slice: \\[ 3\\, 5 \\] }a");
142-
testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}}", "an|{ $elemMatch: { x: 2 } }a");
143-
testComputeKey("{}", "{}", "{a: ObjectId('507f191e810c19729de860ea')}", "an|ia");
144-
testComputeKey("{a: 1}", "{}", "{'a.$': 1}", "eqa|ia.$");
145-
testComputeKey("{a: 1}", "{}", "{a: 1}", "eqa|ia");
130+
testComputeKey("{}", "{}", "{a: 1}", "an|_id-a");
131+
testComputeKey("{}", "{}", "{a: -1}", "an|_id-a");
132+
testComputeKey("{}", "{}", "{a: -1.0}", "an|_id-a");
133+
testComputeKey("{}", "{}", "{a: true}", "an|_id-a");
134+
testComputeKey("{}", "{}", "{a: 0}", "an");
135+
testComputeKey("{}", "{}", "{a: false}", "an");
136+
testComputeKey("{}", "{}", "{a: 99}", "an|_id-a");
137+
testComputeKey("{}", "{}", "{a: 'foo'}", "an|_id-a");
138+
// $slice defaults to exclusion.
139+
testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}}", "an");
140+
testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}, b: 0}", "an");
141+
142+
// But even when using $slice in an inclusion, the entire document is needed.
143+
testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}, b: 1}", "an");
144+
145+
testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}}", "an");
146+
testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}, b: 0}", "an");
147+
testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}, b: 1}", "an");
148+
149+
testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}, b: {$elemMatch: {x: 2}}}", "an");
150+
151+
testComputeKey("{}", "{}", "{a: ObjectId('507f191e810c19729de860ea')}", "an|_id-a");
152+
testComputeKey("{a: 1}", "{}", "{'a.$': 1}", "eqa");
153+
testComputeKey("{a: 1}", "{}", "{a: 1}", "eqa|_id-a");
146154

147155
// Projection should be order-insensitive
148-
testComputeKey("{}", "{}", "{a: 1, b: 1}", "an|iaib");
149-
testComputeKey("{}", "{}", "{b: 1, a: 1}", "an|iaib");
156+
testComputeKey("{}", "{}", "{a: 1, b: 1}", "an|_id-a-b");
157+
testComputeKey("{}", "{}", "{b: 1, a: 1}", "an|_id-a-b");
158+
159+
// And should escape the separation character.
160+
testComputeKey("{}", "{}", "{'b-1': 1, 'a-2': 1}", "an|_id-a\\-2-b\\-1");
161+
162+
// And should exclude $-prefixed fields which can be added internally.
163+
testComputeKey("{}", "{x: 1}", "{$sortKey: {$meta: 'sortKey'}}", "an~ax");
164+
testComputeKey("{}", "{}", "{}", "an");
165+
166+
testComputeKey("{}", "{x: 1}", "{a: 1, $sortKey: {$meta: 'sortKey'}}", "an~ax|_id-a");
167+
testComputeKey("{}", "{}", "{a: 1}", "an|_id-a");
150168

151169
// With or-elimination and projection
152-
testComputeKey("{$or: [{a: 1}]}", "{}", "{_id: 0, a: 1}", "eqa|e_idia");
153-
testComputeKey("{$or: [{a: 1}]}", "{}", "{'a.$': 1}", "eqa|ia.$");
170+
testComputeKey("{$or: [{a: 1}]}", "{}", "{_id: 0, a: 1}", "eqa|a");
171+
testComputeKey("{$or: [{a: 1}]}", "{}", "{'a.$': 1}", "eqa");
154172
}
155173

156174
// Delimiters found in user field names or non-standard projection field values
157175
// must be escaped.
158176
TEST(CanonicalQueryEncoderTest, ComputeKeyEscaped) {
159177
// Field name in query.
160-
testComputeKey("{'a,[]~|<>': 1}", "{}", "{}", "eqa\\,\\[\\]\\~\\|<>");
178+
testComputeKey("{'a,[]~|-<>': 1}", "{}", "{}", "eqa\\,\\[\\]\\~\\|\\-<>");
161179

162180
// Field name in sort.
163-
testComputeKey("{}", "{'a,[]~|<>': 1}", "{}", "an~aa\\,\\[\\]\\~\\|<>");
181+
testComputeKey("{}", "{'a,[]~|-<>': 1}", "{}", "an~aa\\,\\[\\]\\~\\|\\-<>");
164182

165183
// Field name in projection.
166-
testComputeKey("{}", "{}", "{'a,[]~|<>': 1}", "an|ia\\,\\[\\]\\~\\|<>");
184+
testComputeKey("{}", "{}", "{'a,[]~|-<>': 1}", "an|_id-a\\,\\[\\]\\~\\|\\-<>");
167185

168186
// Value in projection.
169-
testComputeKey("{}", "{}", "{a: 'foo,[]~|<>'}", "an|ia");
187+
testComputeKey("{}", "{}", "{a: 'foo,[]~|-<>'}", "an|_id-a");
170188
}
171189

172190
// Cache keys for $geoWithin queries with legacy and GeoJSON coordinates should

0 commit comments

Comments
 (0)