29
29
#include "mc-range-edge-generation-private.h"
30
30
#include "mc-range-encoding-private.h"
31
31
#include "mc-range-mincover-private.h"
32
+ #include "mc-str-encode-string-sets-private.h"
33
+ #include "mc-text-search-str-encode-private.h"
32
34
#include "mc-tokens-private.h"
33
35
#include "mongocrypt-buffer-private.h"
34
36
#include "mongocrypt-ciphertext-private.h"
@@ -1126,26 +1128,22 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
1126
1128
mc_FLE2InsertUpdatePayloadV2_t * payload ,
1127
1129
const _mongocrypt_buffer_t * indexKeyId ,
1128
1130
const mc_FLE2TextSearchInsertSpec_t * spec ,
1129
- const _mongocrypt_buffer_t * value ,
1130
1131
int64_t contentionFactor ,
1131
1132
mongocrypt_status_t * status ) {
1132
1133
BSON_ASSERT_PARAM (kb );
1133
1134
BSON_ASSERT_PARAM (payload );
1134
1135
BSON_ASSERT_PARAM (indexKeyId );
1135
1136
BSON_ASSERT_PARAM (spec );
1136
- BSON_ASSERT_PARAM (value );
1137
1137
1138
1138
_mongocrypt_crypto_t * crypto = kb -> crypt -> crypto ;
1139
1139
mc_TextSearchTokenSets_t * tsts = & payload -> textSearchTokenSets .tsts ;
1140
1140
_FLE2EncryptedPayloadCommon_t common = {{0 }};
1141
1141
bool res = false;
1142
1142
1143
- // TODO MONGOCRYPT-759 implement case folding; for now let foldedValue be a copy of value.
1144
- _mongocrypt_buffer_t foldedValue = {0 };
1145
- _mongocrypt_buffer_init (& foldedValue );
1146
- _mongocrypt_buffer_copy_to (value , & foldedValue );
1147
-
1148
- // TODO MONGOCRYPT-762 do StrEncode here to get substring sets to encode
1143
+ mc_str_encode_sets_t * encodeSets = mc_text_search_str_encode (spec , status );
1144
+ if (!encodeSets ) {
1145
+ goto fail ;
1146
+ }
1149
1147
1150
1148
// Start the token derivations
1151
1149
if (!_get_tokenKey (kb , indexKeyId , & common .tokenKey , status )) {
@@ -1164,72 +1162,154 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb,
1164
1162
goto fail ;
1165
1163
}
1166
1164
1167
- if (!_fle2_generate_TextExactTokenSet (kb ,
1168
- & tsts -> exact ,
1169
- & foldedValue ,
1170
- contentionFactor ,
1171
- common .collectionsLevel1Token ,
1172
- common .serverTokenDerivationLevel1Token ,
1173
- status )) {
1174
- goto fail ;
1165
+ // Generate exact token set singleton
1166
+ {
1167
+ _mongocrypt_buffer_t asBsonValue ;
1168
+ _mongocrypt_buffer_init (& asBsonValue );
1169
+ BSON_ASSERT (encodeSets -> exact .len < INT_MAX );
1170
+ _mongocrypt_buffer_copy_from_string_as_bson_value (& asBsonValue ,
1171
+ (const char * )encodeSets -> exact .data ,
1172
+ (int )encodeSets -> exact .len );
1173
+ if (!_fle2_generate_TextExactTokenSet (kb ,
1174
+ & tsts -> exact ,
1175
+ & asBsonValue ,
1176
+ contentionFactor ,
1177
+ common .collectionsLevel1Token ,
1178
+ common .serverTokenDerivationLevel1Token ,
1179
+ status )) {
1180
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1181
+ goto fail ;
1182
+ }
1183
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1175
1184
}
1176
1185
1177
- if (spec -> substr .set ) {
1178
- // TODO MONGOCRYPT-762 iterate on StrEncode substrings set
1179
- mc_TextSubstringTokenSet_t substrSet = {{0 }};
1180
- mc_TextSubstringTokenSet_init (& substrSet );
1186
+ const char * substring ;
1187
+ uint32_t bytelen ;
1188
+ uint32_t appendCount ;
1181
1189
1182
- if (!_fle2_generate_TextSubstringTokenSet (kb ,
1183
- & substrSet ,
1184
- & foldedValue ,
1185
- contentionFactor ,
1186
- common .collectionsLevel1Token ,
1187
- common .serverTokenDerivationLevel1Token ,
1188
- status )) {
1189
- mc_TextSubstringTokenSet_cleanup (& substrSet );
1190
- goto fail ;
1190
+ // Generate array of substring token sets
1191
+ if (encodeSets -> substring_set ) {
1192
+ mc_substring_set_iter_t set_itr ;
1193
+ mc_substring_set_iter_init (& set_itr , encodeSets -> substring_set );
1194
+
1195
+ while (mc_substring_set_iter_next (& set_itr , & substring , & bytelen , & appendCount )) {
1196
+ BSON_ASSERT (appendCount > 0 );
1197
+ BSON_ASSERT (bytelen < INT_MAX );
1198
+
1199
+ mc_TextSubstringTokenSet_t tset = {{0 }};
1200
+
1201
+ _mongocrypt_buffer_t asBsonValue ;
1202
+ _mongocrypt_buffer_init (& asBsonValue );
1203
+ _mongocrypt_buffer_copy_from_string_as_bson_value (& asBsonValue , substring , (int )bytelen );
1204
+
1205
+ if (!_fle2_generate_TextSubstringTokenSet (kb ,
1206
+ & tset ,
1207
+ & asBsonValue ,
1208
+ contentionFactor ,
1209
+ common .collectionsLevel1Token ,
1210
+ common .serverTokenDerivationLevel1Token ,
1211
+ status )) {
1212
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1213
+ mc_TextSubstringTokenSet_cleanup (& tset );
1214
+ goto fail ;
1215
+ }
1216
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1217
+
1218
+ if (appendCount > 1 ) {
1219
+ mc_TextSubstringTokenSet_t tset_copy ;
1220
+ mc_TextSubstringTokenSet_shallow_copy (& tset , & tset_copy );
1221
+ for (; appendCount > 1 ; appendCount -- ) {
1222
+ _mc_array_append_val (& tsts -> substringArray , tset_copy );
1223
+ }
1224
+ }
1225
+ _mc_array_append_val (& tsts -> substringArray , tset ); // array now owns tset
1191
1226
}
1192
- _mc_array_append_val (& tsts -> substringArray , substrSet );
1193
- }
1194
- if (spec -> suffix .set ) {
1195
- // TODO MONGOCRYPT-762 iterate on StrEncode suffixes set
1196
- mc_TextSuffixTokenSet_t suffixSet = {{0 }};
1197
- mc_TextSuffixTokenSet_init (& suffixSet );
1198
-
1199
- if (!_fle2_generate_TextSuffixTokenSet (kb ,
1200
- & suffixSet ,
1201
- & foldedValue ,
1202
- contentionFactor ,
1203
- common .collectionsLevel1Token ,
1204
- common .serverTokenDerivationLevel1Token ,
1205
- status )) {
1206
- mc_TextSuffixTokenSet_cleanup (& suffixSet );
1207
- goto fail ;
1227
+ }
1228
+
1229
+ // Generate array of suffix token sets
1230
+ if (encodeSets -> suffix_set ) {
1231
+ mc_affix_set_iter_t set_itr ;
1232
+ mc_affix_set_iter_init (& set_itr , encodeSets -> suffix_set );
1233
+
1234
+ while (mc_affix_set_iter_next (& set_itr , & substring , & bytelen , & appendCount )) {
1235
+ BSON_ASSERT (appendCount > 0 );
1236
+ BSON_ASSERT (bytelen < INT_MAX );
1237
+
1238
+ mc_TextSuffixTokenSet_t tset = {{0 }};
1239
+ mc_TextSuffixTokenSet_init (& tset );
1240
+
1241
+ _mongocrypt_buffer_t asBsonValue ;
1242
+ _mongocrypt_buffer_init (& asBsonValue );
1243
+ _mongocrypt_buffer_copy_from_string_as_bson_value (& asBsonValue , substring , (int )bytelen );
1244
+
1245
+ if (!_fle2_generate_TextSuffixTokenSet (kb ,
1246
+ & tset ,
1247
+ & asBsonValue ,
1248
+ contentionFactor ,
1249
+ common .collectionsLevel1Token ,
1250
+ common .serverTokenDerivationLevel1Token ,
1251
+ status )) {
1252
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1253
+ mc_TextSuffixTokenSet_cleanup (& tset );
1254
+ goto fail ;
1255
+ }
1256
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1257
+
1258
+ if (appendCount > 1 ) {
1259
+ mc_TextSuffixTokenSet_t tset_copy ;
1260
+ mc_TextSuffixTokenSet_shallow_copy (& tset , & tset_copy );
1261
+ for (; appendCount > 1 ; appendCount -- ) {
1262
+ _mc_array_append_val (& tsts -> suffixArray , tset_copy );
1263
+ }
1264
+ }
1265
+ _mc_array_append_val (& tsts -> suffixArray , tset ); // array now owns tset
1208
1266
}
1209
- _mc_array_append_val (& tsts -> suffixArray , suffixSet );
1210
- }
1211
- if (spec -> prefix .set ) {
1212
- // TODO MONGOCRYPT-762 iterate on StrEncode suffixes set
1213
- mc_TextPrefixTokenSet_t prefixSet = {{0 }};
1214
- mc_TextPrefixTokenSet_init (& prefixSet );
1215
-
1216
- if (!_fle2_generate_TextPrefixTokenSet (kb ,
1217
- & prefixSet ,
1218
- & foldedValue ,
1219
- contentionFactor ,
1220
- common .collectionsLevel1Token ,
1221
- common .serverTokenDerivationLevel1Token ,
1222
- status )) {
1223
- mc_TextPrefixTokenSet_cleanup (& prefixSet );
1224
- goto fail ;
1267
+ }
1268
+
1269
+ // Generate array of prefix token sets
1270
+ if (encodeSets -> prefix_set ) {
1271
+ mc_affix_set_iter_t set_itr ;
1272
+ mc_affix_set_iter_init (& set_itr , encodeSets -> prefix_set );
1273
+
1274
+ while (mc_affix_set_iter_next (& set_itr , & substring , & bytelen , & appendCount )) {
1275
+ BSON_ASSERT (appendCount > 0 );
1276
+ BSON_ASSERT (bytelen < INT_MAX );
1277
+
1278
+ mc_TextPrefixTokenSet_t tset = {{0 }};
1279
+ mc_TextPrefixTokenSet_init (& tset );
1280
+
1281
+ _mongocrypt_buffer_t asBsonValue ;
1282
+ _mongocrypt_buffer_init (& asBsonValue );
1283
+ _mongocrypt_buffer_copy_from_string_as_bson_value (& asBsonValue , substring , (int )bytelen );
1284
+
1285
+ if (!_fle2_generate_TextPrefixTokenSet (kb ,
1286
+ & tset ,
1287
+ & asBsonValue ,
1288
+ contentionFactor ,
1289
+ common .collectionsLevel1Token ,
1290
+ common .serverTokenDerivationLevel1Token ,
1291
+ status )) {
1292
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1293
+ mc_TextPrefixTokenSet_cleanup (& tset );
1294
+ goto fail ;
1295
+ }
1296
+ _mongocrypt_buffer_cleanup (& asBsonValue );
1297
+
1298
+ if (appendCount > 1 ) {
1299
+ mc_TextPrefixTokenSet_t tset_copy ;
1300
+ mc_TextPrefixTokenSet_shallow_copy (& tset , & tset_copy );
1301
+ for (; appendCount > 1 ; appendCount -- ) {
1302
+ _mc_array_append_val (& tsts -> prefixArray , tset_copy ); // array now owns tset_copy
1303
+ }
1304
+ }
1305
+ _mc_array_append_val (& tsts -> prefixArray , tset ); // moves ownership of tset
1225
1306
}
1226
- _mc_array_append_val (& tsts -> prefixArray , prefixSet );
1227
1307
}
1228
1308
payload -> textSearchTokenSets .set = true;
1229
1309
res = true;
1230
1310
fail :
1231
1311
_FLE2EncryptedPayloadCommon_cleanup (& common );
1232
- _mongocrypt_buffer_cleanup ( & foldedValue );
1312
+ mc_str_encode_sets_destroy ( encodeSets );
1233
1313
return res ;
1234
1314
}
1235
1315
@@ -1350,7 +1430,6 @@ static bool _mongocrypt_fle2_placeholder_to_insert_update_ciphertextForTextSearc
1350
1430
& payload ,
1351
1431
& placeholder -> index_key_id ,
1352
1432
& insertSpec ,
1353
- & value ,
1354
1433
payload .contentionFactor ,
1355
1434
status )) {
1356
1435
goto fail ;
0 commit comments