Skip to content

Commit ecb7614

Browse files
authored
MONGOCRYPT-762 Generate text search token sets from StrEncode output (#946)
1 parent ca98747 commit ecb7614

7 files changed

+458
-96
lines changed

src/mc-fle2-insert-update-payload-private-v2.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
_mongocrypt_buffer_t encryptedTokens; \
3434
} mc_Text##Type##TokenSet_t; \
3535
void mc_Text##Type##TokenSet_init(mc_Text##Type##TokenSet_t *); \
36-
void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *)
36+
void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *); \
37+
void mc_Text##Type##TokenSet_shallow_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dest)
3738

3839
DEF_TEXT_SEARCH_TOKEN_SET(Exact);
3940
DEF_TEXT_SEARCH_TOKEN_SET(Substring);

src/mc-fle2-insert-update-payload-v2.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@
3232
_mongocrypt_buffer_cleanup(&ts->escDerivedToken); \
3333
_mongocrypt_buffer_cleanup(&ts->serverDerivedFromDataToken); \
3434
_mongocrypt_buffer_cleanup(&ts->encryptedTokens); \
35+
} \
36+
void mc_Text##Type##TokenSet_shallow_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dst) { \
37+
BSON_ASSERT_PARAM(src); \
38+
BSON_ASSERT_PARAM(dst); \
39+
_mongocrypt_buffer_set_to(&src->edcDerivedToken, &dst->edcDerivedToken); \
40+
_mongocrypt_buffer_set_to(&src->escDerivedToken, &dst->escDerivedToken); \
41+
_mongocrypt_buffer_set_to(&src->serverDerivedFromDataToken, &dst->serverDerivedFromDataToken); \
42+
_mongocrypt_buffer_set_to(&src->encryptedTokens, &dst->encryptedTokens); \
3543
}
3644

3745
DEF_TEXT_SEARCH_TOKEN_SET_INIT_CLEANUP(Exact)

src/mongocrypt-buffer-private.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,4 +162,10 @@ bool _mongocrypt_buffer_from_subrange(_mongocrypt_buffer_t *out,
162162
uint32_t offset,
163163
uint32_t len) MONGOCRYPT_WARN_UNUSED_RESULT;
164164

165+
/* _mongocrypt_buffer_copy_from_string_as_bson_value initializes @out, wraps the provided string
166+
* into a BSON value, and copies the BSON value to @out. No BSON validation is performed on @str.
167+
* Caller must call _mongocrypt_buffer_cleanup.
168+
*/
169+
void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *out, const char *str, int len);
170+
165171
#endif /* MONGOCRYPT_BUFFER_H */

src/mongocrypt-buffer.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,10 @@ bool _mongocrypt_buffer_to_bson_value(_mongocrypt_buffer_t *plaintext, uint8_t t
317317
return ret;
318318
}
319319

320-
void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) {
320+
static void _mongocrypt_buffer_copy_as_bson_value(_mongocrypt_buffer_t *plaintext,
321+
bool (*append_func)(bson_t *bson, const void *data, int len),
322+
const void *data,
323+
int len) {
321324
bson_t wrapper = BSON_INITIALIZER;
322325
int32_t offset = INT32_LEN /* skips document size */
323326
+ TYPE_LEN /* element type */
@@ -326,13 +329,14 @@ void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *
326329
uint8_t *wrapper_data;
327330

328331
BSON_ASSERT_PARAM(plaintext);
329-
BSON_ASSERT_PARAM(iter);
332+
BSON_ASSERT_PARAM(append_func);
330333

331334
/* It is not straightforward to transform a bson_value_t to a string of
332335
* bytes. As a workaround, we wrap the value in a bson document with an empty
333336
* key, then use the raw buffer from inside the new bson_t, skipping the
334337
* length and type header information and the key name. */
335-
bson_append_iter(&wrapper, "", 0, iter);
338+
append_func(&wrapper, data, len);
339+
336340
wrapper_data = ((uint8_t *)bson_get_data(&wrapper));
337341
BSON_ASSERT(wrapper.len >= (uint32_t)offset + NULL_BYTE_LEN);
338342
plaintext->len = wrapper.len - (uint32_t)offset - NULL_BYTE_LEN; /* the final null byte */
@@ -345,6 +349,25 @@ void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *
345349
bson_destroy(&wrapper);
346350
}
347351

352+
static bool _append_iter(bson_t *bson, const void *iter, int len) {
353+
return bson_append_iter(bson, "", 0, (const bson_iter_t *)iter);
354+
}
355+
356+
static bool _append_utf8(bson_t *bson, const void *str, int len) {
357+
return bson_append_utf8(bson, "", 0, (const char *)str, len);
358+
}
359+
360+
void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *plaintext, const char *str, int len) {
361+
BSON_ASSERT_PARAM(str);
362+
BSON_ASSERT(len >= 0);
363+
_mongocrypt_buffer_copy_as_bson_value(plaintext, _append_utf8, str, len);
364+
}
365+
366+
void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) {
367+
BSON_ASSERT_PARAM(iter);
368+
_mongocrypt_buffer_copy_as_bson_value(plaintext, _append_iter, iter, 0);
369+
}
370+
348371
bool _mongocrypt_buffer_from_uuid_iter(_mongocrypt_buffer_t *buf, bson_iter_t *iter) {
349372
const uint8_t *data;
350373
bson_subtype_t subtype;

src/mongocrypt-marking.c

Lines changed: 143 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include "mc-range-edge-generation-private.h"
3030
#include "mc-range-encoding-private.h"
3131
#include "mc-range-mincover-private.h"
32+
#include "mc-str-encode-string-sets-private.h"
33+
#include "mc-text-search-str-encode-private.h"
3234
#include "mc-tokens-private.h"
3335
#include "mongocrypt-buffer-private.h"
3436
#include "mongocrypt-ciphertext-private.h"
@@ -1126,26 +1128,22 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
11261128
mc_FLE2InsertUpdatePayloadV2_t *payload,
11271129
const _mongocrypt_buffer_t *indexKeyId,
11281130
const mc_FLE2TextSearchInsertSpec_t *spec,
1129-
const _mongocrypt_buffer_t *value,
11301131
int64_t contentionFactor,
11311132
mongocrypt_status_t *status) {
11321133
BSON_ASSERT_PARAM(kb);
11331134
BSON_ASSERT_PARAM(payload);
11341135
BSON_ASSERT_PARAM(indexKeyId);
11351136
BSON_ASSERT_PARAM(spec);
1136-
BSON_ASSERT_PARAM(value);
11371137

11381138
_mongocrypt_crypto_t *crypto = kb->crypt->crypto;
11391139
mc_TextSearchTokenSets_t *tsts = &payload->textSearchTokenSets.tsts;
11401140
_FLE2EncryptedPayloadCommon_t common = {{0}};
11411141
bool res = false;
11421142

1143-
// TODO MONGOCRYPT-759 implement case folding; for now let foldedValue be a copy of value.
1144-
_mongocrypt_buffer_t foldedValue = {0};
1145-
_mongocrypt_buffer_init(&foldedValue);
1146-
_mongocrypt_buffer_copy_to(value, &foldedValue);
1147-
1148-
// TODO MONGOCRYPT-762 do StrEncode here to get substring sets to encode
1143+
mc_str_encode_sets_t *encodeSets = mc_text_search_str_encode(spec, status);
1144+
if (!encodeSets) {
1145+
goto fail;
1146+
}
11491147

11501148
// Start the token derivations
11511149
if (!_get_tokenKey(kb, indexKeyId, &common.tokenKey, status)) {
@@ -1164,72 +1162,154 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
11641162
goto fail;
11651163
}
11661164

1167-
if (!_fle2_generate_TextExactTokenSet(kb,
1168-
&tsts->exact,
1169-
&foldedValue,
1170-
contentionFactor,
1171-
common.collectionsLevel1Token,
1172-
common.serverTokenDerivationLevel1Token,
1173-
status)) {
1174-
goto fail;
1165+
// Generate exact token set singleton
1166+
{
1167+
_mongocrypt_buffer_t asBsonValue;
1168+
_mongocrypt_buffer_init(&asBsonValue);
1169+
BSON_ASSERT(encodeSets->exact.len < INT_MAX);
1170+
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue,
1171+
(const char *)encodeSets->exact.data,
1172+
(int)encodeSets->exact.len);
1173+
if (!_fle2_generate_TextExactTokenSet(kb,
1174+
&tsts->exact,
1175+
&asBsonValue,
1176+
contentionFactor,
1177+
common.collectionsLevel1Token,
1178+
common.serverTokenDerivationLevel1Token,
1179+
status)) {
1180+
_mongocrypt_buffer_cleanup(&asBsonValue);
1181+
goto fail;
1182+
}
1183+
_mongocrypt_buffer_cleanup(&asBsonValue);
11751184
}
11761185

1177-
if (spec->substr.set) {
1178-
// TODO MONGOCRYPT-762 iterate on StrEncode substrings set
1179-
mc_TextSubstringTokenSet_t substrSet = {{0}};
1180-
mc_TextSubstringTokenSet_init(&substrSet);
1186+
const char *substring;
1187+
uint32_t bytelen;
1188+
uint32_t appendCount;
11811189

1182-
if (!_fle2_generate_TextSubstringTokenSet(kb,
1183-
&substrSet,
1184-
&foldedValue,
1185-
contentionFactor,
1186-
common.collectionsLevel1Token,
1187-
common.serverTokenDerivationLevel1Token,
1188-
status)) {
1189-
mc_TextSubstringTokenSet_cleanup(&substrSet);
1190-
goto fail;
1190+
// Generate array of substring token sets
1191+
if (encodeSets->substring_set) {
1192+
mc_substring_set_iter_t set_itr;
1193+
mc_substring_set_iter_init(&set_itr, encodeSets->substring_set);
1194+
1195+
while (mc_substring_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) {
1196+
BSON_ASSERT(appendCount > 0);
1197+
BSON_ASSERT(bytelen < INT_MAX);
1198+
1199+
mc_TextSubstringTokenSet_t tset = {{0}};
1200+
1201+
_mongocrypt_buffer_t asBsonValue;
1202+
_mongocrypt_buffer_init(&asBsonValue);
1203+
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen);
1204+
1205+
if (!_fle2_generate_TextSubstringTokenSet(kb,
1206+
&tset,
1207+
&asBsonValue,
1208+
contentionFactor,
1209+
common.collectionsLevel1Token,
1210+
common.serverTokenDerivationLevel1Token,
1211+
status)) {
1212+
_mongocrypt_buffer_cleanup(&asBsonValue);
1213+
mc_TextSubstringTokenSet_cleanup(&tset);
1214+
goto fail;
1215+
}
1216+
_mongocrypt_buffer_cleanup(&asBsonValue);
1217+
1218+
if (appendCount > 1) {
1219+
mc_TextSubstringTokenSet_t tset_copy;
1220+
mc_TextSubstringTokenSet_shallow_copy(&tset, &tset_copy);
1221+
for (; appendCount > 1; appendCount--) {
1222+
_mc_array_append_val(&tsts->substringArray, tset_copy);
1223+
}
1224+
}
1225+
_mc_array_append_val(&tsts->substringArray, tset); // array now owns tset
11911226
}
1192-
_mc_array_append_val(&tsts->substringArray, substrSet);
1193-
}
1194-
if (spec->suffix.set) {
1195-
// TODO MONGOCRYPT-762 iterate on StrEncode suffixes set
1196-
mc_TextSuffixTokenSet_t suffixSet = {{0}};
1197-
mc_TextSuffixTokenSet_init(&suffixSet);
1198-
1199-
if (!_fle2_generate_TextSuffixTokenSet(kb,
1200-
&suffixSet,
1201-
&foldedValue,
1202-
contentionFactor,
1203-
common.collectionsLevel1Token,
1204-
common.serverTokenDerivationLevel1Token,
1205-
status)) {
1206-
mc_TextSuffixTokenSet_cleanup(&suffixSet);
1207-
goto fail;
1227+
}
1228+
1229+
// Generate array of suffix token sets
1230+
if (encodeSets->suffix_set) {
1231+
mc_affix_set_iter_t set_itr;
1232+
mc_affix_set_iter_init(&set_itr, encodeSets->suffix_set);
1233+
1234+
while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) {
1235+
BSON_ASSERT(appendCount > 0);
1236+
BSON_ASSERT(bytelen < INT_MAX);
1237+
1238+
mc_TextSuffixTokenSet_t tset = {{0}};
1239+
mc_TextSuffixTokenSet_init(&tset);
1240+
1241+
_mongocrypt_buffer_t asBsonValue;
1242+
_mongocrypt_buffer_init(&asBsonValue);
1243+
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen);
1244+
1245+
if (!_fle2_generate_TextSuffixTokenSet(kb,
1246+
&tset,
1247+
&asBsonValue,
1248+
contentionFactor,
1249+
common.collectionsLevel1Token,
1250+
common.serverTokenDerivationLevel1Token,
1251+
status)) {
1252+
_mongocrypt_buffer_cleanup(&asBsonValue);
1253+
mc_TextSuffixTokenSet_cleanup(&tset);
1254+
goto fail;
1255+
}
1256+
_mongocrypt_buffer_cleanup(&asBsonValue);
1257+
1258+
if (appendCount > 1) {
1259+
mc_TextSuffixTokenSet_t tset_copy;
1260+
mc_TextSuffixTokenSet_shallow_copy(&tset, &tset_copy);
1261+
for (; appendCount > 1; appendCount--) {
1262+
_mc_array_append_val(&tsts->suffixArray, tset_copy);
1263+
}
1264+
}
1265+
_mc_array_append_val(&tsts->suffixArray, tset); // array now owns tset
12081266
}
1209-
_mc_array_append_val(&tsts->suffixArray, suffixSet);
1210-
}
1211-
if (spec->prefix.set) {
1212-
// TODO MONGOCRYPT-762 iterate on StrEncode suffixes set
1213-
mc_TextPrefixTokenSet_t prefixSet = {{0}};
1214-
mc_TextPrefixTokenSet_init(&prefixSet);
1215-
1216-
if (!_fle2_generate_TextPrefixTokenSet(kb,
1217-
&prefixSet,
1218-
&foldedValue,
1219-
contentionFactor,
1220-
common.collectionsLevel1Token,
1221-
common.serverTokenDerivationLevel1Token,
1222-
status)) {
1223-
mc_TextPrefixTokenSet_cleanup(&prefixSet);
1224-
goto fail;
1267+
}
1268+
1269+
// Generate array of prefix token sets
1270+
if (encodeSets->prefix_set) {
1271+
mc_affix_set_iter_t set_itr;
1272+
mc_affix_set_iter_init(&set_itr, encodeSets->prefix_set);
1273+
1274+
while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) {
1275+
BSON_ASSERT(appendCount > 0);
1276+
BSON_ASSERT(bytelen < INT_MAX);
1277+
1278+
mc_TextPrefixTokenSet_t tset = {{0}};
1279+
mc_TextPrefixTokenSet_init(&tset);
1280+
1281+
_mongocrypt_buffer_t asBsonValue;
1282+
_mongocrypt_buffer_init(&asBsonValue);
1283+
_mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen);
1284+
1285+
if (!_fle2_generate_TextPrefixTokenSet(kb,
1286+
&tset,
1287+
&asBsonValue,
1288+
contentionFactor,
1289+
common.collectionsLevel1Token,
1290+
common.serverTokenDerivationLevel1Token,
1291+
status)) {
1292+
_mongocrypt_buffer_cleanup(&asBsonValue);
1293+
mc_TextPrefixTokenSet_cleanup(&tset);
1294+
goto fail;
1295+
}
1296+
_mongocrypt_buffer_cleanup(&asBsonValue);
1297+
1298+
if (appendCount > 1) {
1299+
mc_TextPrefixTokenSet_t tset_copy;
1300+
mc_TextPrefixTokenSet_shallow_copy(&tset, &tset_copy);
1301+
for (; appendCount > 1; appendCount--) {
1302+
_mc_array_append_val(&tsts->prefixArray, tset_copy); // array now owns tset_copy
1303+
}
1304+
}
1305+
_mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset
12251306
}
1226-
_mc_array_append_val(&tsts->prefixArray, prefixSet);
12271307
}
12281308
payload->textSearchTokenSets.set = true;
12291309
res = true;
12301310
fail:
12311311
_FLE2EncryptedPayloadCommon_cleanup(&common);
1232-
_mongocrypt_buffer_cleanup(&foldedValue);
1312+
mc_str_encode_sets_destroy(encodeSets);
12331313
return res;
12341314
}
12351315

@@ -1350,7 +1430,6 @@ static bool _mongocrypt_fle2_placeholder_to_insert_update_ciphertextForTextSearc
13501430
&payload,
13511431
&placeholder->index_key_id,
13521432
&insertSpec,
1353-
&value,
13541433
payload.contentionFactor,
13551434
status)) {
13561435
goto fail;

test/test-mongocrypt-buffer.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <mongocrypt-marking-private.h>
1818

19+
#include "mongocrypt-buffer-private.h"
1920
#include "test-mongocrypt-assert.h"
2021
#include "test-mongocrypt.h"
2122

@@ -232,11 +233,35 @@ static void _test_mongocrypt_buffer_from_subrange(_mongocrypt_tester_t *tester)
232233
_mongocrypt_buffer_cleanup(&input);
233234
}
234235

236+
static void _test_mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_tester_t *tester) {
237+
_mongocrypt_buffer_t buf;
238+
_mongocrypt_buffer_t expectedLenBuf;
239+
const char *data = "foobar";
240+
241+
// expect output to contain 4-byte length + data + null string terminator
242+
size_t expectedLen = sizeof(int32_t) + strlen(data) + sizeof(uint8_t);
243+
_mongocrypt_buffer_copy_from_hex(&expectedLenBuf, "07000000");
244+
245+
_mongocrypt_buffer_copy_from_string_as_bson_value(&buf, data, (int)strlen(data));
246+
ASSERT(buf.len == expectedLen);
247+
248+
// check 4-byte length
249+
ASSERT_CMPBYTES(expectedLenBuf.data, expectedLenBuf.len, buf.data, expectedLenBuf.len);
250+
// check data + null byte
251+
ASSERT_CMPBYTES((const uint8_t *)data,
252+
strlen(data) + 1,
253+
buf.data + expectedLenBuf.len,
254+
buf.len - expectedLenBuf.len);
255+
_mongocrypt_buffer_cleanup(&buf);
256+
_mongocrypt_buffer_cleanup(&expectedLenBuf);
257+
}
258+
235259
void _mongocrypt_tester_install_buffer(_mongocrypt_tester_t *tester) {
236260
INSTALL_TEST(_test_mongocrypt_buffer_from_iter);
237261
INSTALL_TEST(_test_mongocrypt_buffer_copy_from_data_and_size);
238262
INSTALL_TEST(_test_mongocrypt_buffer_steal_from_data_and_size);
239263
INSTALL_TEST(_test_mongocrypt_buffer_steal_from_string);
240264
INSTALL_TEST(_test_mongocrypt_buffer_copy_from_uint64_le);
241265
INSTALL_TEST(_test_mongocrypt_buffer_from_subrange);
266+
INSTALL_TEST(_test_mongocrypt_buffer_copy_from_string_as_bson_value);
242267
}

0 commit comments

Comments
 (0)