Skip to content

Commit 41ea6a8

Browse files
authored
MONGOCRYPT-761 Convert FLE2EncryptionPlaceholder to a find payload for text search indexed fields (#990)
1 parent af62167 commit 41ea6a8

9 files changed

+895
-150
lines changed

src/mc-fle-blob-subtype-private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ typedef enum {
4646

4747
/* Text Search Subtypes */
4848
MC_SUBTYPE_FLE2IndexedTextEncryptedValue = 17,
49+
MC_SUBTYPE_FLE2FindTextPayload = 18,
4950
} mc_fle_blob_subtype_t;
5051

5152
#endif /* MC_FLE_BLOB_SUBTYPE_PRIVATE_H */

src/mc-fle2-encryption-placeholder.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -698,11 +698,7 @@ bool mc_FLE2TextSearchInsertSpec_parse(mc_FLE2TextSearchInsertSpec_t *out,
698698
CHECK_HAS(v)
699699
CHECK_HAS(casef)
700700
CHECK_HAS(diacf)
701-
// one of substr/suffix/prefix must be set
702-
if (!(has_substr || has_suffix || has_prefix)) {
703-
CLIENT_ERR(ERROR_PREFIX "Must have a substring, suffix, or prefix index specification");
704-
goto fail;
705-
}
701+
706702
return true;
707703

708704
fail:

src/mc-text-search-str-encode-private.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,11 @@ mc_str_encode_sets_t *mc_text_search_str_encode(const mc_FLE2TextSearchInsertSpe
4242

4343
void mc_str_encode_sets_destroy(mc_str_encode_sets_t *sets);
4444

45+
// Applies case/diacritic folding to the string value in spec (if applicable), and returns
46+
// the resulting string as a BSON string element in *out. Returns false and an error if the string
47+
// is not valid UTF-8 or is unsuitable per the query parameters in the spec.
48+
bool mc_text_search_str_query(const mc_FLE2TextSearchInsertSpec_t *spec,
49+
_mongocrypt_buffer_t *out,
50+
mongocrypt_status_t *status);
51+
4552
#endif /* MONGOCRYPT_TEXT_SEARCH_STR_ENCODE_PRIVATE_H */

src/mc-text-search-str-encode.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include "mc-fle2-encryption-placeholder-private.h"
1718
#include "mc-str-encode-string-sets-private.h"
1819
#include "mc-text-search-str-encode-private.h"
1920
#include "mongocrypt-buffer-private.h"
21+
#include "mongocrypt-crypto-private.h"
2022
#include "mongocrypt.h"
2123
#include "unicode/fold.h"
2224
#include <bson/bson.h>
@@ -246,3 +248,77 @@ void mc_str_encode_sets_destroy(mc_str_encode_sets_t *sets) {
246248
mc_substring_set_destroy(sets->substring_set);
247249
bson_free(sets);
248250
}
251+
252+
bool mc_text_search_str_query(const mc_FLE2TextSearchInsertSpec_t *spec,
253+
_mongocrypt_buffer_t *out,
254+
mongocrypt_status_t *status) {
255+
BSON_ASSERT_PARAM(spec);
256+
BSON_ASSERT_PARAM(out);
257+
258+
if (spec->len > MAX_ENCODE_BYTE_LEN) {
259+
CLIENT_ERR("StrQuery: String passed in was too long: String was %" PRIu32 " bytes, but max is %d bytes",
260+
spec->len,
261+
MAX_ENCODE_BYTE_LEN);
262+
return false;
263+
}
264+
265+
_mongocrypt_buffer_init(out);
266+
if (!bson_utf8_validate(spec->v, spec->len, false /* allow_null */)) {
267+
CLIENT_ERR("StrQuery: String passed in was not valid UTF-8");
268+
return false;
269+
}
270+
271+
uint32_t folded_codepoint_len = 0;
272+
273+
if (spec->casef || spec->diacf) {
274+
char *folded_str;
275+
size_t folded_str_bytes_len;
276+
if (!unicode_fold(spec->v,
277+
spec->len,
278+
(spec->casef * kUnicodeFoldToLower) | (spec->diacf * kUnicodeFoldRemoveDiacritics),
279+
&folded_str,
280+
&folded_str_bytes_len,
281+
status)) {
282+
return false;
283+
}
284+
_mongocrypt_buffer_copy_from_string_as_bson_value(out, folded_str, (int)folded_str_bytes_len);
285+
folded_codepoint_len = mc_get_utf8_codepoint_length(folded_str, (uint32_t)folded_str_bytes_len);
286+
bson_free(folded_str);
287+
} else {
288+
_mongocrypt_buffer_copy_from_string_as_bson_value(out, spec->v, (int)spec->len);
289+
folded_codepoint_len = mc_get_utf8_codepoint_length(spec->v, spec->len);
290+
}
291+
292+
if (spec->substr.set || spec->suffix.set || spec->prefix.set) {
293+
uint32_t min = 0, max = 0;
294+
if (spec->substr.set) {
295+
min = spec->substr.value.lb;
296+
max = spec->substr.value.ub;
297+
} else if (spec->suffix.set) {
298+
min = spec->suffix.value.lb;
299+
max = spec->suffix.value.ub;
300+
} else {
301+
min = spec->prefix.value.lb;
302+
max = spec->prefix.value.ub;
303+
}
304+
if (folded_codepoint_len == 0) {
305+
CLIENT_ERR("StrQuery: string value cannot be empty for substring, suffix, or prefix queries");
306+
return false;
307+
}
308+
if (folded_codepoint_len > max) {
309+
CLIENT_ERR("StrQuery: string value was longer than the maximum query length "
310+
"for this field after folding -- folded codepoint len: %u, max query len: %u",
311+
folded_codepoint_len,
312+
max);
313+
return false;
314+
}
315+
if (folded_codepoint_len < min) {
316+
CLIENT_ERR("StrQuery: string value was shorter than the minimum query length "
317+
"for this field after folding -- folded codepoint len: %u, min query len: %u",
318+
folded_codepoint_len,
319+
min);
320+
return false;
321+
}
322+
}
323+
return true;
324+
}

src/mongocrypt-ctx-encrypt.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616

1717
#include "mc-efc-private.h"
18+
#include "mc-fle-blob-subtype-private.h"
1819
#include "mc-fle2-rfds-private.h"
1920
#include "mc-tokens-private.h"
2021
#include "mongocrypt-ciphertext-private.h"
@@ -574,7 +575,8 @@ static bool _try_run_csfle_marking(mongocrypt_ctx_t *ctx) {
574575
static bool _mongocrypt_fle2_insert_update_find(mc_fle_blob_subtype_t subtype) {
575576
return (subtype == MC_SUBTYPE_FLE2InsertUpdatePayload) || (subtype == MC_SUBTYPE_FLE2InsertUpdatePayloadV2)
576577
|| (subtype == MC_SUBTYPE_FLE2FindEqualityPayload) || (subtype == MC_SUBTYPE_FLE2FindEqualityPayloadV2)
577-
|| (subtype == MC_SUBTYPE_FLE2FindRangePayload) || (subtype == MC_SUBTYPE_FLE2FindRangePayloadV2);
578+
|| (subtype == MC_SUBTYPE_FLE2FindRangePayload) || (subtype == MC_SUBTYPE_FLE2FindRangePayloadV2)
579+
|| (subtype == MC_SUBTYPE_FLE2FindTextPayload);
578580
}
579581

580582
static bool

src/mongocrypt-marking.c

Lines changed: 192 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "mc-fle2-find-equality-payload-private.h"
2222
#include "mc-fle2-find-range-payload-private-v2.h"
2323
#include "mc-fle2-find-range-payload-private.h"
24+
#include "mc-fle2-find-text-payload-private.h"
2425
#include "mc-fle2-insert-update-payload-private-v2.h"
2526
#include "mc-fle2-insert-update-payload-private.h"
2627
#include "mc-fle2-payload-uev-private.h"
@@ -1132,6 +1133,46 @@ static bool _mongocrypt_fle2_placeholder_to_insert_update_ciphertextForRange(_mo
11321133
return false; \
11331134
} \
11341135
return true; \
1136+
} \
1137+
static bool _fle2_generate_Text##Type##FindTokenSet( \
1138+
_mongocrypt_key_broker_t *kb, \
1139+
mc_Text##Type##FindTokenSet_t *out, \
1140+
const _mongocrypt_buffer_t *value, \
1141+
const mc_CollectionsLevel1Token_t *collLevel1Token, \
1142+
const mc_ServerTokenDerivationLevel1Token_t *serverLevel1Token, \
1143+
mongocrypt_status_t *status) { \
1144+
BSON_ASSERT_PARAM(kb); \
1145+
BSON_ASSERT_PARAM(kb->crypt); \
1146+
BSON_ASSERT_PARAM(out); \
1147+
BSON_ASSERT_PARAM(value); \
1148+
BSON_ASSERT_PARAM(collLevel1Token); \
1149+
BSON_ASSERT_PARAM(serverLevel1Token); \
1150+
if (!_fle2_derive_EDCText##Type##_token(kb->crypt->crypto, \
1151+
&out->edcDerivedToken, \
1152+
collLevel1Token, \
1153+
value, \
1154+
false, \
1155+
0, \
1156+
status)) { \
1157+
return false; \
1158+
} \
1159+
if (!_fle2_derive_ESCText##Type##_token(kb->crypt->crypto, \
1160+
&out->escDerivedToken, \
1161+
collLevel1Token, \
1162+
value, \
1163+
false, \
1164+
0, \
1165+
status)) { \
1166+
return false; \
1167+
} \
1168+
if (!_fle2_derive_serverText##Type##DerivedFromDataToken(kb->crypt->crypto, \
1169+
&out->serverDerivedFromDataToken, \
1170+
serverLevel1Token, \
1171+
value, \
1172+
status)) { \
1173+
return false; \
1174+
} \
1175+
return true; \
11351176
}
11361177
GENERATE_TEXT_SEARCH_TOKEN_SET_FOR_TYPE_IMPL(Exact)
11371178
GENERATE_TEXT_SEARCH_TOKEN_SET_FOR_TYPE_IMPL(Substring)
@@ -1328,6 +1369,98 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
13281369
return res;
13291370
}
13301371

1372+
static bool _fle2_generate_TextSearchFindTokenSets(_mongocrypt_key_broker_t *kb,
1373+
mc_TextSearchFindTokenSets_t *out,
1374+
const _mongocrypt_buffer_t *indexKeyId,
1375+
const mc_FLE2TextSearchInsertSpec_t *spec,
1376+
mongocrypt_status_t *status) {
1377+
BSON_ASSERT_PARAM(kb);
1378+
BSON_ASSERT_PARAM(kb->crypt);
1379+
BSON_ASSERT_PARAM(out);
1380+
BSON_ASSERT_PARAM(indexKeyId);
1381+
BSON_ASSERT_PARAM(spec);
1382+
1383+
_mongocrypt_crypto_t *crypto = kb->crypt->crypto;
1384+
_FLE2EncryptedPayloadCommon_t common = {{0}};
1385+
_mongocrypt_buffer_t asBsonValue = {0};
1386+
bool res = false;
1387+
1388+
int operator_count = (int)spec->substr.set + (int)spec->suffix.set + (int)spec->prefix.set;
1389+
if (operator_count > 1) {
1390+
CLIENT_ERR("Text search query specification cannot contain multiple query type specifications");
1391+
goto fail;
1392+
}
1393+
1394+
if (!mc_text_search_str_query(spec, &asBsonValue, status)) {
1395+
goto fail;
1396+
}
1397+
1398+
// Start the token derivations
1399+
if (!_get_tokenKey(kb, indexKeyId, &common.tokenKey, status)) {
1400+
goto fail;
1401+
}
1402+
1403+
common.collectionsLevel1Token = mc_CollectionsLevel1Token_new(crypto, &common.tokenKey, status);
1404+
if (!common.collectionsLevel1Token) {
1405+
CLIENT_ERR("unable to derive collectionLevel1Token");
1406+
goto fail;
1407+
}
1408+
1409+
common.serverTokenDerivationLevel1Token = mc_ServerTokenDerivationLevel1Token_new(crypto, &common.tokenKey, status);
1410+
if (!common.serverTokenDerivationLevel1Token) {
1411+
CLIENT_ERR("unable to derive serverTokenDerivationLevel1Token");
1412+
goto fail;
1413+
}
1414+
1415+
if (spec->substr.set) {
1416+
if (!_fle2_generate_TextSubstringFindTokenSet(kb,
1417+
&out->substring.value,
1418+
&asBsonValue,
1419+
common.collectionsLevel1Token,
1420+
common.serverTokenDerivationLevel1Token,
1421+
status)) {
1422+
goto fail;
1423+
}
1424+
out->substring.set = true;
1425+
} else if (spec->suffix.set) {
1426+
if (!_fle2_generate_TextSuffixFindTokenSet(kb,
1427+
&out->suffix.value,
1428+
&asBsonValue,
1429+
common.collectionsLevel1Token,
1430+
common.serverTokenDerivationLevel1Token,
1431+
status)) {
1432+
goto fail;
1433+
}
1434+
out->suffix.set = true;
1435+
1436+
} else if (spec->prefix.set) {
1437+
if (!_fle2_generate_TextPrefixFindTokenSet(kb,
1438+
&out->prefix.value,
1439+
&asBsonValue,
1440+
common.collectionsLevel1Token,
1441+
common.serverTokenDerivationLevel1Token,
1442+
status)) {
1443+
goto fail;
1444+
}
1445+
out->prefix.set = true;
1446+
} else {
1447+
if (!_fle2_generate_TextExactFindTokenSet(kb,
1448+
&out->exact.value,
1449+
&asBsonValue,
1450+
common.collectionsLevel1Token,
1451+
common.serverTokenDerivationLevel1Token,
1452+
status)) {
1453+
goto fail;
1454+
}
1455+
out->exact.set = true;
1456+
}
1457+
res = true;
1458+
fail:
1459+
_mongocrypt_buffer_cleanup(&asBsonValue);
1460+
_FLE2EncryptedPayloadCommon_cleanup(&common);
1461+
return res;
1462+
}
1463+
13311464
/**
13321465
* Payload subtype 11: FLE2InsertUpdatePayloadV2 for text search inserts/updates
13331466
*
@@ -1366,6 +1499,12 @@ static bool _mongocrypt_fle2_placeholder_to_insert_update_ciphertextForTextSearc
13661499
goto fail;
13671500
}
13681501

1502+
// One of substr/suffix/prefix must be set for inserts
1503+
if (!(insertSpec.substr.set || insertSpec.suffix.set || insertSpec.prefix.set)) {
1504+
CLIENT_ERR("FLE2TextSearchInsertSpec is missing a substring, suffix, or prefix index specification");
1505+
goto fail;
1506+
}
1507+
13691508
// t
13701509
payload.valueType = BSON_TYPE_UTF8;
13711510

@@ -1830,9 +1969,59 @@ static bool _mongocrypt_fle2_placeholder_to_find_ciphertextForTextSearch(_mongoc
18301969
_mongocrypt_marking_t *marking,
18311970
_mongocrypt_ciphertext_t *ciphertext,
18321971
mongocrypt_status_t *status) {
1833-
// TODO MONGOCRYPT-761 implement find support for text search fields
1834-
CLIENT_ERR("Text search find is not yet supported");
1835-
return false;
1972+
BSON_ASSERT_PARAM(kb);
1973+
BSON_ASSERT_PARAM(marking);
1974+
BSON_ASSERT_PARAM(ciphertext);
1975+
BSON_ASSERT(kb->crypt);
1976+
BSON_ASSERT(marking->type == MONGOCRYPT_MARKING_FLE2_ENCRYPTION);
1977+
1978+
bool res = false;
1979+
mc_FLE2EncryptionPlaceholder_t *placeholder = &marking->u.fle2;
1980+
BSON_ASSERT(placeholder->type == MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_FIND);
1981+
BSON_ASSERT(placeholder->algorithm == MONGOCRYPT_FLE2_ALGORITHM_TEXT_SEARCH);
1982+
1983+
mc_FLE2FindTextPayload_t payload;
1984+
mc_FLE2FindTextPayload_init(&payload);
1985+
1986+
mc_FLE2TextSearchInsertSpec_t spec;
1987+
if (!mc_FLE2TextSearchInsertSpec_parse(&spec, &placeholder->v_iter, status)) {
1988+
goto fail;
1989+
}
1990+
1991+
if (!_fle2_generate_TextSearchFindTokenSets(kb, &payload.tokenSets, &placeholder->index_key_id, &spec, status)) {
1992+
goto fail;
1993+
}
1994+
1995+
payload.caseFold = spec.casef;
1996+
payload.diacriticFold = spec.diacf;
1997+
payload.maxContentionFactor = placeholder->maxContentionFactor;
1998+
if (spec.substr.set) {
1999+
payload.substringSpec.set = true;
2000+
payload.substringSpec.value = spec.substr.value;
2001+
} else if (spec.suffix.set) {
2002+
payload.suffixSpec.set = true;
2003+
payload.suffixSpec.value = spec.suffix.value;
2004+
} else if (spec.prefix.set) {
2005+
payload.prefixSpec.set = true;
2006+
payload.prefixSpec.value = spec.prefix.value;
2007+
}
2008+
2009+
// Serialize.
2010+
{
2011+
bson_t out = BSON_INITIALIZER;
2012+
mc_FLE2FindTextPayload_serialize(&payload, &out);
2013+
_mongocrypt_buffer_steal_from_bson(&ciphertext->data, &out);
2014+
}
2015+
2016+
// Do not set ciphertext->original_bson_type and ciphertext->key_id. They are
2017+
// not used for FLE2FindTextPayload.
2018+
ciphertext->blob_subtype = MC_SUBTYPE_FLE2FindTextPayload;
2019+
2020+
res = true;
2021+
2022+
fail:
2023+
mc_FLE2FindTextPayload_cleanup(&payload);
2024+
return res;
18362025
}
18372026

18382027
static bool _mongocrypt_fle2_placeholder_to_FLE2UnindexedEncryptedValue(_mongocrypt_key_broker_t *kb,

test/test-mc-fle2-encryption-placeholder.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -390,17 +390,6 @@ static void _test_FLE2EncryptionPlaceholder_textSearch_parse(_mongocrypt_tester_
390390
mongocrypt_status_destroy(status);
391391
}
392392

393-
// Test type=MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT without substring, suffix, or prefix specs
394-
{
395-
const char *input = RAW_STRING({"v" : "foobar", "casef" : false, "diacf" : true});
396-
mongocrypt_status_t *status = mongocrypt_status_new();
397-
mc_FLE2TextSearchInsertSpec_t spec;
398-
ASSERT_FAILS_STATUS(_parse_text_search_spec_from_placeholder(tester, input, &spec, status),
399-
status,
400-
"Must have a substring, suffix, or prefix index specification");
401-
mongocrypt_status_destroy(status);
402-
}
403-
404393
// Test type=MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT with lb > ub
405394
#define LB_GT_UB_TEST(Type) \
406395
do { \

0 commit comments

Comments
 (0)