Skip to content

Commit 3c8f683

Browse files
paroskiEvergreen Agent
authored andcommitted
SERVER-82885 Improve perf of bson::advance()
1 parent 5e9f50e commit 3c8f683

File tree

2 files changed

+89
-61
lines changed

2 files changed

+89
-61
lines changed

src/mongo/db/exec/sbe/values/bson.cpp

Lines changed: 54 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -45,73 +45,67 @@
4545
namespace mongo {
4646
namespace sbe {
4747
namespace bson {
48-
49-
/**
50-
* Advance table specifies how to change the pointer to skip current BSON value (so that pointer
51-
* points to the next byte after the BSON value):
52-
* - For values less than 128 (0x80), pointer is advanced by this value
53-
* - 255 (0xff) - pointer is advanced by the 32-bit integer stored in the buffer plus 4 bytes
54-
* - 254 (0xfe) - pointer is advanced by the 32-bit integer stored in the buffer
55-
* - 128 (0x80) - the type is either unsupported or handled explicitly
56-
*/
5748
// clang-format off
58-
static uint8_t advanceTable[] = {
59-
0xff, // EOO
60-
8, // Double
61-
0xff, // String
62-
0xfe, // Object
63-
0xfe, // Array
64-
0x80, // BinData
65-
0, // Undefined - Deprecated
66-
12, // ObjectId
67-
1, // Boolean
68-
8, // UTC datetime
69-
0, // Null value
70-
0x80, // Regular expression
71-
0x80, // DBPointer - Deprecated
72-
0xff, // JavaScript code
73-
0xff, // Symbol - Deprecated
74-
0xfe, // JavaScript code with scope - Deprecated
75-
4, // 32-bit integer
76-
8, // Timestamp
77-
8, // 64-bit integer
78-
16 // 128-bit decimal floating point
79-
49+
const uint8_t kAdvanceTable alignas(64)[256] = {
50+
0x7F, // 0: EOO
51+
8, // 1: Double
52+
0xFB, // 2: String
53+
0xFF, // 3: Object
54+
0xFF, // 4: Array
55+
0xFA, // 5: BinData
56+
0, // 6: Undefined
57+
12, // 7: ObjectId
58+
1, // 8: Boolean
59+
8, // 9: UTC datetime
60+
0, // 10: Null
61+
0x7F, // 11: Regular expression
62+
0xEF, // 12: DBPointer
63+
0xFB, // 13: JavaScript code
64+
0xFB, // 14: Symbol
65+
0xFF, // 15: JavaScript code with scope
66+
4, // 16: 32-bit integer
67+
8, // 17: Timestamp
68+
8, // 18: 64-bit integer
69+
16, // 19: 128-bit decimal floating point
70+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 20-29: Invalid
71+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 30-39: Invalid
72+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 40-49: Invalid
73+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 50-59: Invalid
74+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 60-69: Invalid
75+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 70-79: Invalid
76+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 80-89: Invalid
77+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 90-99: Invalid
78+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 100-109: Invalid
79+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 110-119: Invalid
80+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 120-126: Invalid
81+
0, // 127: MaxKey
82+
0x7F, 0x7F, // 128-129: Invalid
83+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 130-139: Invalid
84+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 140-149: Invalid
85+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 150-159: Invalid
86+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 160-169: Invalid
87+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 170-179: Invalid
88+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 180-189: Invalid
89+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 190-199: Invalid
90+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 200-209: Invalid
91+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 210-219: Invalid
92+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 220-229: Invalid
93+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 230-239: Invalid
94+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 240-249: Invalid
95+
0x7F, 0x7F, 0x7F, 0x7F, 0x7F, // 250-254: Invalid
96+
0, // 255: MinKey
8097
};
8198
// clang-format on
8299

83-
const char* advance(const char* be, size_t fieldNameSize) {
100+
const char* advanceHelper(const char* be, size_t fieldNameSize) {
84101
auto type = static_cast<unsigned char>(*be);
102+
uassert(4822804, "unsupported bson element", static_cast<BSONType>(type) == BSONType::RegEx);
85103

86-
be += 1 /*type*/ + fieldNameSize + 1 /*zero at the end of fieldname*/;
87-
if (type < sizeof(advanceTable)) {
88-
auto advOffset = advanceTable[type];
89-
if (advOffset < 128) {
90-
be += advOffset;
91-
} else if (static_cast<BSONType>(type) == BSONType::RegEx) {
92-
be += value::BsonRegex(be).byteSize();
93-
} else if (static_cast<BSONType>(type) == BSONType::DBRef) {
94-
be += value::BsonDBPointer(be).byteSize();
95-
} else {
96-
be += ConstDataView(be).read<LittleEndian<uint32_t>>();
97-
if (advOffset == 0xff) {
98-
be += 4;
99-
} else if (advOffset == 0xfe) {
100-
} else {
101-
if (static_cast<BSONType>(type) == BSONType::BinData) {
102-
be += 5;
103-
} else {
104-
uasserted(4822803, "unsupported bson element");
105-
}
106-
}
107-
}
108-
} else if (type == static_cast<unsigned char>(BSONType::MinKey) ||
109-
type == static_cast<unsigned char>(BSONType::MaxKey)) {
110-
// We don't have to adjust the 'be' pointer as the above types have no value part.
111-
} else {
112-
uasserted(4822804, "unsupported bson element");
113-
}
104+
size_t sizeOfTypeCodeAndFieldName =
105+
1 /*type*/ + fieldNameSize + 1 /*zero at the end of fieldname*/;
114106

107+
be += sizeOfTypeCodeAndFieldName;
108+
be += value::BsonRegex(be).byteSize();
115109
return be;
116110
}
117111

src/mongo/db/exec/sbe/values/bson.h

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,41 @@ std::pair<value::TypeTags, value::Value> convertFrom(const BSONElement& elem) {
5151
elem.rawdata(), elem.rawdata() + elem.size(), elem.fieldNameSize() - 1);
5252
}
5353

54-
const char* advance(const char* be, size_t fieldNameSize);
54+
/**
55+
* Advance table specifies how to change the pointer to skip current BSON value (so that pointer
56+
* points to the next byte after the BSON value):
57+
* - For each entry N in 'kAdvanceTable' that is less than 0x7F, pointer is advanced by N.
58+
* - For each entry N in 'kAdvanceTable' that is greater than 0x7F, pointer is advanced by
59+
* the 32-bit integer stored in buffer plus ~N.
60+
* - For each entry N in 'kAdvanceTable' that is equal to 0x7F, the type is either RegEx or it
61+
* is an unsupported type (EOO) or its an invalid type value (i.e. the type value does not
62+
* correspond to any known type).
63+
*/
64+
extern const uint8_t kAdvanceTable alignas(64)[256];
65+
66+
const char* advanceHelper(const char* be, size_t fieldNameSize);
67+
68+
inline const char* advance(const char* be, size_t fieldNameSize) {
69+
auto type = static_cast<unsigned char>(*be);
70+
auto advOffset = kAdvanceTable[type];
71+
72+
size_t sizeOfTypeCodeAndFieldName =
73+
1 /*type*/ + fieldNameSize + 1 /*zero at the end of fieldname*/;
74+
75+
if (MONGO_likely(advOffset < 0x7Fu)) {
76+
be += sizeOfTypeCodeAndFieldName;
77+
be += advOffset;
78+
return be;
79+
} else if (MONGO_likely(advOffset > 0x7Fu)) {
80+
advOffset = ~advOffset;
81+
be += sizeOfTypeCodeAndFieldName;
82+
be += ConstDataView(be).read<LittleEndian<int32_t>>();
83+
be += advOffset;
84+
return be;
85+
}
86+
87+
return advanceHelper(be, fieldNameSize);
88+
}
5589

5690
inline auto fieldNameAndLength(const char* be) noexcept {
5791
return StringData{be + 1};

0 commit comments

Comments
 (0)