@@ -211,19 +211,34 @@ static zend_always_inline Bucket *simdjson_zend_hash_str_find_bucket(const HashT
211
211
return NULL ;
212
212
}
213
213
214
- static zend_always_inline void simdjson_init_reused_key_strings (HashTable *repeated_key_strings) {
215
- if (UNEXPECTED (repeated_key_strings->nTableSize == 0 )) { // array is not initialized yet
216
- zend_hash_init (repeated_key_strings, SIMDJSON_REPEATED_STRINGS_COUNT, NULL , NULL , 0 );
217
- zend_hash_real_init_mixed (repeated_key_strings);
218
- }
214
+ static zend_always_inline void simdjson_release_reused_key_strings (HashTable *dedup_key_strings) {
215
+ ZEND_ASSERT (dedup_key_strings->nNumUsed > 0 );
216
+ Bucket *p = dedup_key_strings->arData ;
217
+ Bucket *end = p + dedup_key_strings->nNumUsed ;
218
+ do {
219
+ if (GC_DELREF (p->key ) == 0 ) {
220
+ ZEND_ASSERT (!(GC_FLAGS (p->key ) & IS_STR_PERSISTENT));
221
+ efree (p->key );
222
+ }
223
+ } while (++p != end);
219
224
}
220
225
221
- static zend_always_inline void simdjson_clean_reused_key_strings (HashTable *repeated_key_strings) {
222
- ZEND_ASSERT (repeated_key_strings->nTableMask == HT_SIZE_TO_MASK (SIMDJSON_REPEATED_STRINGS_COUNT));
223
- if (repeated_key_strings->nNumUsed ) {
224
- HT_HASH_RESET (repeated_key_strings);
225
- repeated_key_strings->nNumUsed = 0 ;
226
- repeated_key_strings->nNumOfElements = 0 ;
226
+ static zend_always_inline void simdjson_init_reused_key_strings (HashTable *dedup_key_strings) {
227
+ if (UNEXPECTED (dedup_key_strings->nTableSize == 0 )) {
228
+ // hash table is not initialized yet
229
+ zend_hash_init (dedup_key_strings, SIMDJSON_DEDUP_STRING_COUNT, NULL , NULL , 0 );
230
+ // zend_hash_real_init_mixed
231
+ void * data = emalloc (HT_SIZE_EX (SIMDJSON_DEDUP_STRING_COUNT, HT_SIZE_TO_MASK (SIMDJSON_DEDUP_STRING_COUNT)));
232
+ dedup_key_strings->nTableMask = HT_SIZE_TO_MASK (SIMDJSON_DEDUP_STRING_COUNT);
233
+ HT_SET_DATA_ADDR (dedup_key_strings, data);
234
+ HT_HASH_RESET (dedup_key_strings);
235
+ } else if (dedup_key_strings->nNumUsed > SIMDJSON_DEDUP_STRING_COUNT / 2 ) {
236
+ // more than half of hash table is already full, cleanup
237
+ simdjson_release_reused_key_strings (dedup_key_strings);
238
+ ZEND_ASSERT (dedup_key_strings->nTableMask == HT_SIZE_TO_MASK (SIMDJSON_DEDUP_STRING_COUNT));
239
+ HT_HASH_RESET (dedup_key_strings);
240
+ dedup_key_strings->nNumUsed = 0 ;
241
+ dedup_key_strings->nNumOfElements = 0 ;
227
242
}
228
243
}
229
244
@@ -232,21 +247,26 @@ static zend_always_inline void simdjson_clean_reused_key_strings(HashTable *repe
232
247
* This method check if key was already used in same JSON document and returns a reference or allocate new string if
233
248
* is unique
234
249
*/
235
- static zend_always_inline zend_string* simdjson_reuse_key (HashTable *ht, const char *str, size_t len, zend_ulong h) {
250
+ static zend_always_inline zend_string* simdjson_dedup_key (HashTable *ht, const char *str, size_t len, zend_ulong h) {
236
251
uint32_t nIndex;
237
252
uint32_t idx;
238
253
Bucket *p;
239
254
zend_string *key;
240
255
256
+ if (len > SIMDJSON_MAX_DEDUP_LENGTH) {
257
+ goto init_new_string;
258
+ }
259
+
241
260
// This should make computation faster, as we know array size
242
261
ZEND_ASSERT (ht != NULL );
243
- ZEND_ASSERT (ht->nTableMask == HT_SIZE_TO_MASK (SIMDJSON_REPEATED_STRINGS_COUNT ));
262
+ ZEND_ASSERT (ht->nTableMask == HT_SIZE_TO_MASK (SIMDJSON_DEDUP_STRING_COUNT ));
244
263
245
264
p = simdjson_zend_hash_str_find_bucket (ht, str, len, h);
246
265
if (p) { // Key already exists, reuse
247
266
GC_ADDREF (p->key ); // raise reference counter by one
248
267
return p->key ;
249
- } else if (UNEXPECTED (ht->nNumUsed >= SIMDJSON_REPEATED_STRINGS_COUNT)) { // hashtable is full
268
+ } else if (UNEXPECTED (ht->nNumUsed >= SIMDJSON_DEDUP_STRING_COUNT)) { // hashtable is full
269
+ init_new_string:
250
270
key = simdjson_string_init (str, len); // always return new string if hashtable is full
251
271
ZSTR_H (key) = h; // set hash to zend_string
252
272
return key;
@@ -255,6 +275,7 @@ static zend_always_inline zend_string* simdjson_reuse_key(HashTable *ht, const c
255
275
ht->nNumOfElements ++;
256
276
p = ht->arData + idx;
257
277
p->key = simdjson_string_init (str, len); // initialize new string for key
278
+ GC_ADDREF (p->key ); // raise gc counter by one, so it will be 2
258
279
p->h = ZSTR_H (p->key ) = h;
259
280
// ZVAL_NULL(&p->val); // we dont need set value to null, as we don't use it and destructor is set to NULL
260
281
nIndex = h | ht->nTableMask ;
@@ -271,7 +292,7 @@ static zend_always_inline zend_string* simdjson_reuse_key(HashTable *ht, const c
271
292
* - initialized array as zend_hash_real_init_mixed
272
293
* - exact size must be known in advance
273
294
*/
274
- static zend_always_inline void simdjson_zend_hash_str_add_or_update (HashTable *ht, const char *str, size_t len, zval *pData, HashTable *repeated_key_strings ) {
295
+ static zend_always_inline void simdjson_zend_hash_str_add_or_update (HashTable *ht, const char *str, size_t len, zval *pData, HashTable *dedup_key_strings ) {
275
296
uint32_t nIndex;
276
297
uint32_t idx;
277
298
Bucket *p;
@@ -297,7 +318,7 @@ static zend_always_inline void simdjson_zend_hash_str_add_or_update(HashTable *h
297
318
idx = ht->nNumUsed ++;
298
319
ht->nNumOfElements ++;
299
320
p = ht->arData + idx;
300
- p->key = simdjson_reuse_key (repeated_key_strings , str, len, h); // initialize new string for key
321
+ p->key = simdjson_dedup_key (dedup_key_strings , str, len, h); // initialize new string for key
301
322
// p->key = simdjson_string_init(str, len);
302
323
p->h = /* ZSTR_H(p->key) =*/ h;
303
324
HT_FLAGS (ht) &= ~HASH_FLAG_STATIC_KEYS;
@@ -309,7 +330,7 @@ static zend_always_inline void simdjson_zend_hash_str_add_or_update(HashTable *h
309
330
}
310
331
#endif // PHP_VERSION_ID >= 80200
311
332
312
- static zend_always_inline void simdjson_add_key_to_symtable (HashTable *ht, const char *buf, size_t len, zval *value, HashTable *repeated_key_strings ) {
333
+ static zend_always_inline void simdjson_add_key_to_symtable (HashTable *ht, const char *buf, size_t len, zval *value, HashTable *dedup_key_strings ) {
313
334
#if PHP_VERSION_ID >= 80200
314
335
zend_ulong idx;
315
336
if (UNEXPECTED (ZEND_HANDLE_NUMERIC_STR (buf, len, idx))) {
@@ -319,7 +340,7 @@ static zend_always_inline void simdjson_add_key_to_symtable(HashTable *ht, const
319
340
zend_string *key = len == 1 ? ZSTR_CHAR ((unsigned char )buf[0 ]) : ZSTR_EMPTY_ALLOC ();
320
341
zend_hash_update (ht, key, value);
321
342
} else {
322
- simdjson_zend_hash_str_add_or_update (ht, buf, len, value, repeated_key_strings );
343
+ simdjson_zend_hash_str_add_or_update (ht, buf, len, value, dedup_key_strings );
323
344
}
324
345
#else
325
346
if (len <= 1 ) {
@@ -347,7 +368,7 @@ static zend_always_inline void simdjson_set_zval_to_int64(zval *zv, int64_t valu
347
368
ZVAL_LONG (zv, value);
348
369
}
349
370
350
- static void simdjson_create_array (simdjson::dom::element element, zval *return_value, HashTable *repeated_key_strings ) {
371
+ static void simdjson_create_array (simdjson::dom::element element, zval *return_value, HashTable *dedup_key_strings ) {
351
372
switch (element.type ()) {
352
373
// ASCII sort
353
374
case simdjson::dom::element_type::STRING :
@@ -380,7 +401,7 @@ static void simdjson_create_array(simdjson::dom::element element, zval *return_v
380
401
zend_array *arr = simdjson_init_packed_array (return_value, 0xFFFFFF );
381
402
for (simdjson::dom::element child : json_array) {
382
403
zval array_element;
383
- simdjson_create_array (child, &array_element, repeated_key_strings );
404
+ simdjson_create_array (child, &array_element, dedup_key_strings );
384
405
zend_hash_next_index_insert_new (arr, &array_element);
385
406
}
386
407
break ;
@@ -391,14 +412,14 @@ static void simdjson_create_array(simdjson::dom::element element, zval *return_v
391
412
/* Optimised variant of adding elements to array with known size available since PHP 8.2 */
392
413
ZEND_HASH_FILL_PACKED (arr) {
393
414
for (simdjson::dom::element child : json_array) {
394
- simdjson_create_array (child, __fill_val, repeated_key_strings );
415
+ simdjson_create_array (child, __fill_val, dedup_key_strings );
395
416
ZEND_HASH_FILL_NEXT ();
396
417
}
397
418
} ZEND_HASH_FILL_END ();
398
419
#else
399
420
for (simdjson::dom::element child : json_array) {
400
421
zval array_element;
401
- simdjson_create_array (child, &array_element, repeated_key_strings );
422
+ simdjson_create_array (child, &array_element, dedup_key_strings );
402
423
zend_hash_next_index_insert_new (arr, &array_element);
403
424
}
404
425
#endif
@@ -412,16 +433,12 @@ static void simdjson_create_array(simdjson::dom::element element, zval *return_v
412
433
break ;
413
434
}
414
435
415
- #if PHP_VERSION_ID >= 80200
416
- // Allocate table for reusing already allocated keys
417
- simdjson_init_reused_key_strings (repeated_key_strings);
418
- #endif
419
436
HashTable *ht = simdjson_init_mixed_array (return_value, json_object.size ());
420
437
421
438
for (simdjson::dom::key_value_pair field : json_object) {
422
439
zval array_element;
423
- simdjson_create_array (field.value , &array_element, repeated_key_strings );
424
- simdjson_add_key_to_symtable (ht, field.key .data (), field.key .size (), &array_element, repeated_key_strings );
440
+ simdjson_create_array (field.value , &array_element, dedup_key_strings );
441
+ simdjson_add_key_to_symtable (ht, field.key .data (), field.key .size (), &array_element, dedup_key_strings );
425
442
}
426
443
break ;
427
444
}
@@ -431,7 +448,7 @@ static void simdjson_create_array(simdjson::dom::element element, zval *return_v
431
448
432
449
/* }}} */
433
450
434
- static simdjson_php_error_code simdjson_create_object (simdjson::dom::element element, zval *return_value, HashTable *repeated_key_strings ) /* {{{ */ {
451
+ static simdjson_php_error_code simdjson_create_object (simdjson::dom::element element, zval *return_value, HashTable *dedup_key_strings ) /* {{{ */ {
435
452
switch (element.type ()) {
436
453
// ASCII sort
437
454
case simdjson::dom::element_type::STRING :
@@ -465,7 +482,7 @@ static simdjson_php_error_code simdjson_create_object(simdjson::dom::element ele
465
482
466
483
for (simdjson::dom::element child : json_array) {
467
484
zval value;
468
- simdjson_php_error_code error = simdjson_create_object (child, &value, repeated_key_strings );
485
+ simdjson_php_error_code error = simdjson_create_object (child, &value, dedup_key_strings );
469
486
if (UNEXPECTED (error)) {
470
487
zval_ptr_dtor (return_value);
471
488
ZVAL_NULL (return_value);
@@ -478,10 +495,6 @@ static simdjson_php_error_code simdjson_create_object(simdjson::dom::element ele
478
495
case simdjson::dom::element_type::OBJECT : {
479
496
const auto json_object = element.get_object ().value_unsafe ();
480
497
zend_object *obj = simdjson_init_object (return_value, json_object.size ());
481
- #if PHP_VERSION_ID >= 80200
482
- // Allocate table for reusing already allocated keys
483
- simdjson_init_reused_key_strings (repeated_key_strings);
484
- #endif
485
498
486
499
for (simdjson::dom::key_value_pair field : json_object) {
487
500
const char *data = field.key .data ();
@@ -493,7 +506,7 @@ static simdjson_php_error_code simdjson_create_object(simdjson::dom::element ele
493
506
return SIMDJSON_PHP_ERR_INVALID_PHP_PROPERTY;
494
507
}
495
508
zval value;
496
- simdjson_php_error_code error = simdjson_create_object (field.value , &value, repeated_key_strings );
509
+ simdjson_php_error_code error = simdjson_create_object (field.value , &value, dedup_key_strings );
497
510
if (UNEXPECTED (error)) {
498
511
zval_ptr_dtor (return_value);
499
512
ZVAL_NULL (return_value);
@@ -507,7 +520,7 @@ static simdjson_php_error_code simdjson_create_object(simdjson::dom::element ele
507
520
} else {
508
521
#if PHP_VERSION_ID >= 80200
509
522
zend_ulong h = zend_inline_hash_func (data, size);
510
- key = simdjson_reuse_key (repeated_key_strings , data, size, h);
523
+ key = simdjson_dedup_key (dedup_key_strings , data, size, h);
511
524
#else
512
525
key = simdjson_string_init (data, size);
513
526
#endif
@@ -530,27 +543,30 @@ PHP_SIMDJSON_API simdjson_php_parser* php_simdjson_create_parser(void) /* {{{ */
530
543
}
531
544
532
545
PHP_SIMDJSON_API void php_simdjson_free_parser (simdjson_php_parser* parser) /* {{{ */ {
533
- // Destroy repeated_key_strings hash if was allocated
534
- if (parser->repeated_key_strings .nTableSize ) {
535
- efree (HT_GET_DATA_ADDR (&parser->repeated_key_strings ));
546
+ #if PHP_VERSION_ID >= 80200
547
+ // Destroy dedup_key_strings hash if was allocated
548
+ if (parser->dedup_key_strings .nTableSize ) {
549
+ if (parser->dedup_key_strings .nNumUsed ) {
550
+ simdjson_release_reused_key_strings (&parser->dedup_key_strings );
551
+ }
552
+ efree (HT_GET_DATA_ADDR (&parser->dedup_key_strings ));
536
553
}
554
+ #endif
537
555
delete parser;
538
556
}
539
557
540
- static zend_always_inline simdjson_php_error_code simdjson_convert_element (simdjson::dom::element element, zval *return_value, bool associative, HashTable *repeated_key_strings) {
558
+ static simdjson_php_error_code simdjson_convert_element (simdjson::dom::element element, zval *return_value, bool associative, HashTable *dedup_key_strings) {
559
+ #if PHP_VERSION_ID >= 80200
560
+ // Allocate table for reusing already allocated keys
561
+ simdjson_init_reused_key_strings (dedup_key_strings);
562
+ #endif
541
563
simdjson_php_error_code resp;
542
564
if (associative) {
543
- simdjson_create_array (element, return_value, repeated_key_strings );
565
+ simdjson_create_array (element, return_value, dedup_key_strings );
544
566
resp = simdjson::SUCCESS;
545
567
} else {
546
- resp = simdjson_create_object (element, return_value, repeated_key_strings);
547
- }
548
- #if PHP_VERSION_ID >= 80200
549
- // Cleanup table if repeated_key_strings hashtable was initialized
550
- if (repeated_key_strings->nTableSize != 0 ) {
551
- simdjson_clean_reused_key_strings (repeated_key_strings);
568
+ resp = simdjson_create_object (element, return_value, dedup_key_strings);
552
569
}
553
- #endif
554
570
return resp;
555
571
}
556
572
@@ -566,14 +582,14 @@ PHP_SIMDJSON_API simdjson_php_error_code php_simdjson_parse(simdjson_php_parser*
566
582
simdjson::dom::element doc;
567
583
568
584
SIMDJSON_PHP_TRY (build_parsed_json_cust (parser, doc, ZSTR_VAL (json), ZSTR_LEN (json), simdjson_realloc_needed (json), depth));
569
- return simdjson_convert_element (doc, return_value, associative, &parser->repeated_key_strings );
585
+ return simdjson_convert_element (doc, return_value, associative, &parser->dedup_key_strings );
570
586
}
571
587
572
588
PHP_SIMDJSON_API simdjson_php_error_code php_simdjson_parse_buffer (simdjson_php_parser* parser, const char *json, size_t len, zval *return_value, bool associative, size_t depth) /* {{{ */ {
573
589
simdjson::dom::element doc;
574
590
575
591
SIMDJSON_PHP_TRY (build_parsed_json_cust (parser, doc, json, len, false , depth));
576
- return simdjson_convert_element (doc, return_value, associative, &parser->repeated_key_strings );
592
+ return simdjson_convert_element (doc, return_value, associative, &parser->dedup_key_strings );
577
593
}
578
594
579
595
/* }}} */
@@ -583,7 +599,7 @@ PHP_SIMDJSON_API simdjson_php_error_code php_simdjson_key_value(simdjson_php_par
583
599
simdjson::dom::element element;
584
600
SIMDJSON_PHP_TRY (build_parsed_json_cust (parser, doc, ZSTR_VAL (json), ZSTR_LEN (json), simdjson_realloc_needed (json), depth));
585
601
SIMDJSON_PHP_TRY (get_key_with_optional_prefix (doc, key).get (element));
586
- return simdjson_convert_element (element, return_value, associative, &parser->repeated_key_strings );
602
+ return simdjson_convert_element (element, return_value, associative, &parser->dedup_key_strings );
587
603
}
588
604
589
605
/* }}} */
0 commit comments