Skip to content

Commit 92700cf

Browse files
committed
decoder: Use faster ondemand parser for simdjson_key_count method
1 parent 9287423 commit 92700cf

9 files changed

+36
-66
lines changed

php_simdjson.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,6 @@ PHP_FUNCTION(simdjson_key_count) {
324324
Z_PARAM_BOOL(throw_if_uncountable)
325325
ZEND_PARSE_PARAMETERS_END();
326326

327-
if (!simdjson_validate_depth(depth, 3)) {
328-
RETURN_THROWS();
329-
}
330-
331327
simdjson_php_error_code error;
332328
if (SIMDJSON_SHOULD_REUSE_PARSER(ZSTR_LEN(json))) {
333329
error = php_simdjson_key_count(simdjson_get_reused_parser(), json, ZSTR_VAL(key), return_value, depth);

simdjson.stub.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,14 +286,13 @@ function simdjson_key_value(string $json, string $key, bool $associative = false
286286
*
287287
* @param string $json The JSON string being decoded
288288
* @param string $key The JSON pointer being requested
289-
* @param int $depth The maximum nesting depth of the structure being decoded.
289+
* @param int $depth Not used anymore
290290
* @param bool $throw_if_uncountable If true, then throw SimdJsonException instead of
291291
* returning 0 for JSON pointers
292292
* to values that are neither objects nor arrays.
293293
* @return int
294294
* @throws SimdJsonDecoderException for invalid JSON or invalid JSON pointer
295295
* (or document over 4GB, or out of range integer/float)
296-
* @throws ValueError for invalid $depth
297296
* @see https://www.rfc-editor.org/rfc/rfc6901.html
298297
*/
299298
function simdjson_key_count(string $json, string $key, int $depth = 512, bool $throw_if_uncountable = false): int {}

simdjson_arginfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* This is a generated file, edit the .stub.php file instead.
2-
* Stub hash: 21c3b21c513b1ce2b8c72912856cd6d7326dbfe5 */
2+
* Stub hash: d30184e8da243c7681539c2f70cb0afbd97ca02d */
33

44
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_simdjson_validate, 0, 1, _IS_BOOL, 0)
55
ZEND_ARG_TYPE_INFO(0, json, IS_STRING, 0)

src/simdjson_decoder.cpp

Lines changed: 33 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ get_key_with_optional_prefix(simdjson::dom::element &doc, std::string_view json_
5757
return doc.at_pointer(std_pointer);
5858
}
5959

60+
static inline simdjson::simdjson_result<simdjson::ondemand::value>
61+
get_key_with_optional_prefix_ondemand(simdjson::ondemand::document &doc, std::string_view json_pointer) {
62+
/* https://www.rfc-editor.org/rfc/rfc6901.html */
63+
/* TODO: Deprecate in a subsequent minor release and remove in a major release to comply with the standard. */
64+
auto std_pointer = ((!json_pointer.empty() && json_pointer[0] != '/') ? "/" : "") + std::string(json_pointer.begin(), json_pointer.end());
65+
return doc.at_pointer(std_pointer);
66+
}
67+
6068
// Initialize stdClass object and return pointer to propertires HashTable
6169
static zend_always_inline HashTable* simdjson_init_object(zval *zv, uint32_t size) {
6270
#if PHP_VERSION_ID >= 80300
@@ -130,7 +138,7 @@ static zend_always_inline HashTable* simdjson_init_mixed_array(zval *zv, uint32_
130138
return ht;
131139
}
132140

133-
/** Check if it is necessary to reallocate string to buffer */
141+
// Check if it is necessary to reallocate string to buffer
134142
static zend_always_inline bool simdjson_realloc_needed(const zend_string *json) {
135143
// it is not possible to check allocated size for persistent or permanent string
136144
bool is_persistent_or_permanent = GC_FLAGS(json) & (IS_STR_PERSISTENT | IS_STR_PERMANENT);
@@ -148,6 +156,14 @@ static zend_always_inline bool simdjson_realloc_needed(const zend_string *json)
148156
return free_space < simdjson::SIMDJSON_PADDING;
149157
}
150158

159+
static simdjson::padded_string_view simdjson_padded_string_view(const zend_string *json) {
160+
if (simdjson_realloc_needed(json)) {
161+
return simdjson::padded_string(ZSTR_VAL(json), ZSTR_LEN(json));
162+
} else {
163+
return simdjson::padded_string_view(ZSTR_VAL(json), ZSTR_LEN(json), ZSTR_LEN(json) + simdjson::SIMDJSON_PADDING);
164+
}
165+
}
166+
151167
/** Decoded string from JSON must be always UTF-8 valid, so we can provide proper flag to zend_string */
152168
static zend_always_inline zend_string* simdjson_string_init(const char* buf, size_t len) {
153169
zend_string *str = zend_string_init(buf, len, 0);
@@ -649,52 +665,29 @@ PHP_SIMDJSON_API simdjson_php_error_code php_simdjson_key_exists(simdjson_php_pa
649665
return get_key_with_optional_prefix(doc, key).error();
650666
}
651667

652-
/* }}} */
653-
654-
PHP_SIMDJSON_API simdjson_php_error_code php_simdjson_key_count(simdjson_php_parser* parser, const zend_string *json, const char *key, zval *return_value, size_t depth) /* {{{ */ {
655-
simdjson::dom::element doc;
656-
simdjson::dom::element element;
657-
658-
SIMDJSON_PHP_TRY(build_parsed_json_cust(parser, doc, ZSTR_VAL(json), ZSTR_LEN(json), simdjson_realloc_needed(json), depth));
668+
PHP_SIMDJSON_API simdjson_php_error_code php_simdjson_key_count(simdjson_php_parser* parser, const zend_string *json, const char *key, zval *return_value, size_t depth) {
669+
simdjson::ondemand::document doc;
670+
simdjson::ondemand::value value;
671+
simdjson::ondemand::json_type type;
659672

660-
SIMDJSON_PHP_TRY(get_key_with_optional_prefix(doc, key).get(element));
673+
SIMDJSON_PHP_TRY(parser->ondemand_parser.iterate(simdjson_padded_string_view(json)).get(doc));
674+
SIMDJSON_PHP_TRY(get_key_with_optional_prefix_ondemand(doc, key).get(value));
675+
SIMDJSON_PHP_TRY(value.type().get(type));
661676

662-
zend_long key_count;
663-
switch (element.type()) {
664-
//ASCII sort
665-
case simdjson::dom::element_type::ARRAY : {
666-
auto json_array = element.get_array().value_unsafe();
667-
key_count = zend_long(json_array.size());
668-
if (UNEXPECTED(key_count == 0xFFFFFF)) {
669-
/* The C simdjson library represents array sizes larger than 0xFFFFFF as 0xFFFFFF. */
670-
key_count = 0;
671-
for (auto it: json_array) {
672-
(void)it;
673-
key_count++;
674-
}
675-
ZEND_ASSERT(key_count >= 0xFFFFFF);
676-
}
677+
size_t key_count;
678+
switch (type) {
679+
case simdjson::ondemand::json_type::array:
680+
SIMDJSON_PHP_TRY(value.count_elements().get(key_count));
677681
break;
678-
}
679-
case simdjson::dom::element_type::OBJECT : {
680-
auto json_object = element.get_object().value_unsafe();
681-
key_count = zend_long(json_object.size());
682-
if (UNEXPECTED(key_count == 0xFFFFFF)) {
683-
/* The C simdjson library represents object sizes larger than 0xFFFFFF as 0xFFFFFF. */
684-
key_count = 0;
685-
for (auto it: json_object) {
686-
(void)it;
687-
key_count++;
688-
}
689-
ZEND_ASSERT(key_count >= 0xFFFFFF);
690-
}
682+
683+
case simdjson::ondemand::json_type::object:
684+
SIMDJSON_PHP_TRY(value.count_fields().get(key_count));
691685
break;
692-
}
686+
693687
default:
694688
return SIMDJSON_PHP_ERR_KEY_COUNT_NOT_COUNTABLE;
695689
}
690+
696691
ZVAL_LONG(return_value, key_count);
697692
return simdjson::SUCCESS;
698693
}
699-
700-
/* }}} */

src/simdjson_decoder_defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
struct simdjson_php_parser {
2323
public:
2424
simdjson::dom::parser parser;
25+
simdjson::ondemand::parser ondemand_parser;
2526
HashTable dedup_key_strings;
2627
};
2728

tests/decode_max_depth.phpt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,31 @@ require_once __DIR__ . '/dump.inc';
99

1010
foreach ([0, PHP_INT_MIN, 1024, PHP_INT_MAX >> 1, PHP_INT_MAX] as $depth) {
1111
dump(function () use ($depth) { return simdjson_decode('[]', true, $depth); });
12-
dump(function () use ($depth) { return simdjson_key_count('{"a":"b"}', 'a', $depth); });
1312
dump(function () use ($depth) { return simdjson_key_value('{"a":{}}', 'a', true, $depth); });
1413
dump(function () use ($depth) { return simdjson_key_exists('{"a":{}}', 'a', $depth); });
1514
dump(function () use ($depth) { return simdjson_is_valid('{}', $depth); });
1615
}
1716
?>
1817
--EXPECTF--
1918
Caught ValueError: simdjson_decode(): Argument #3 ($depth) must be greater than zero
20-
Caught ValueError: simdjson_key_count(): Argument #3 ($depth) must be greater than zero
2119
Caught ValueError: simdjson_key_value(): Argument #4 ($depth) must be greater than zero
2220
Caught ValueError: simdjson_key_exists(): Argument #3 ($depth) must be greater than zero
2321
Caught ValueError: simdjson_is_valid(): Argument #2 ($depth) must be greater than zero
2422
Caught ValueError: simdjson_decode(): Argument #3 ($depth) must be greater than zero
25-
Caught ValueError: simdjson_key_count(): Argument #3 ($depth) must be greater than zero
2623
Caught ValueError: simdjson_key_value(): Argument #4 ($depth) must be greater than zero
2724
Caught ValueError: simdjson_key_exists(): Argument #3 ($depth) must be greater than zero
2825
Caught ValueError: simdjson_is_valid(): Argument #2 ($depth) must be greater than zero
2926
array(0) {
3027
}
31-
int(0)
3228
array(0) {
3329
}
3430
bool(true)
3531
bool(true)
3632
Caught ValueError: simdjson_decode(): Argument #3 ($depth) exceeds maximum allowed value of %d
37-
Caught ValueError: simdjson_key_count(): Argument #3 ($depth) exceeds maximum allowed value of %d
3833
Caught ValueError: simdjson_key_value(): Argument #4 ($depth) exceeds maximum allowed value of %d
3934
Caught ValueError: simdjson_key_exists(): Argument #3 ($depth) exceeds maximum allowed value of %d
4035
Caught ValueError: simdjson_is_valid(): Argument #2 ($depth) exceeds maximum allowed value of %d
4136
Caught ValueError: simdjson_decode(): Argument #3 ($depth) exceeds maximum allowed value of %d
42-
Caught ValueError: simdjson_key_count(): Argument #3 ($depth) exceeds maximum allowed value of %d
4337
Caught ValueError: simdjson_key_value(): Argument #4 ($depth) exceeds maximum allowed value of %d
4438
Caught ValueError: simdjson_key_exists(): Argument #3 ($depth) exceeds maximum allowed value of %d
4539
Caught ValueError: simdjson_is_valid(): Argument #2 ($depth) exceeds maximum allowed value of %d

tests/decode_max_depth_memory_reduction.phpt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ foreach ([1024, 1 << 27] as $depth) {
1212
echo "Test depth=$depth:\n";
1313
$value = simdjson_decode('[]', true, $depth);
1414
var_dump($value);
15-
$value = simdjson_key_count('{"a":"b"}', 'a', $depth);
16-
var_dump($value);
1715
try {
1816
simdjson_decode(str_repeat('[', 200000) . str_repeat(']', 199999), true, $depth);
1917
echo "should be invalid\n";
@@ -26,10 +24,8 @@ foreach ([1024, 1 << 27] as $depth) {
2624
Test depth=1024:
2725
array(0) {
2826
}
29-
int(0)
3027
Caught SimdJsonDecoderException: The JSON document was too deep (too many nested objects and arrays)
3128
Test depth=134217728:
3229
array(0) {
3330
}
34-
int(0)
3531
Caught SimdJsonDecoderException: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc.

tests/depth.phpt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ error_reporting=E_ALL
88
require_once __DIR__ . '/dump.inc';
99

1010
// too small/large depths tested in decode_max_depth
11-
var_dump(simdjson_key_count('[]', '', 1));
1211
var_dump(simdjson_decode('[]', true, 1));
1312
echo "Test '[1]'\n";
1413
try {
@@ -30,7 +29,6 @@ var_dump(simdjson_decode('[[]]', true, 2));
3029
var_dump(simdjson_decode('[[1]]', true, 3));
3130
?>
3231
--EXPECTF--
33-
int(0)
3432
array(0) {
3533
}
3634
Test '[1]'

tests/key_count_exception.phpt

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@ simdjson_key_count throws exception if key was not found test
44
<?php
55
$json = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . '_files' . DIRECTORY_SEPARATOR . 'result.json');
66

7-
try {
8-
\simdjson_key_count($json, "unknown", 1);
9-
} catch (\RuntimeException $e) {
10-
var_dump($e->getMessage());
11-
}
12-
137
try {
148
\simdjson_key_count($json, "unknown");
159
} catch (\RuntimeException $e) {
@@ -18,5 +12,4 @@ try {
1812

1913
?>
2014
--EXPECT--
21-
string(67) "The JSON document was too deep (too many nested objects and arrays)"
2215
string(56) "The JSON field referenced does not exist in this object."

0 commit comments

Comments
 (0)