From 7c95b788404c57952404f2b61c9a7c6e455110cc Mon Sep 17 00:00:00 2001 From: K Date: Fri, 7 May 2021 16:47:51 +0200 Subject: [PATCH 1/3] optimize unpack() for named fields - unpack function bypasses costly snprintf call if possible (ie. named field without repetition). - Elements are added via add_assoc_*_ex functions with explicit key length. This way there's no need for internal strlen call. --- ext/standard/pack.c | 76 +++++++++++---------- ext/standard/tests/strings/pack_arrays.phpt | 42 ++++++++++++ 2 files changed, 83 insertions(+), 35 deletions(-) create mode 100644 ext/standard/tests/strings/pack_arrays.phpt diff --git a/ext/standard/pack.c b/ext/standard/pack.c index 3fad071aab662..449f8bef3fa83 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -739,24 +739,24 @@ PHP_FUNCTION(unpack) while (formatlen-- > 0) { char type = *(format++); char c; - int arg = 1, argb; + int repetitions = 1, argb; char *name; int namelen; - int size=0; + int size = 0; /* Handle format arguments if any */ if (formatlen > 0) { c = *format; if (c >= '0' && c <= '9') { - arg = atoi(format); + repetitions = atoi(format); while (formatlen > 0 && *format >= '0' && *format <= '9') { format++; formatlen--; } } else if (c == '*') { - arg = -1; + repetitions = -1; format++; formatlen--; } @@ -764,7 +764,7 @@ PHP_FUNCTION(unpack) /* Get of new value in array */ name = format; - argb = arg; + argb = repetitions; while (formatlen > 0 && *format != '/') { formatlen--; @@ -780,9 +780,9 @@ PHP_FUNCTION(unpack) /* Never use any input */ case 'X': size = -1; - if (arg < 0) { + if (repetitions < 0) { php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type); - arg = 1; + repetitions = 1; } break; @@ -793,14 +793,14 @@ PHP_FUNCTION(unpack) case 'a': case 'A': case 'Z': - size = arg; - arg = 1; + size = repetitions; + repetitions = 1; break; case 'h': case 'H': - size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg; - arg = 1; + size = (repetitions > 0) ? (repetitions + (repetitions % 2)) / 2 : repetitions; + repetitions = 1; break; /* Use 1 byte of input */ @@ -871,16 +871,21 @@ PHP_FUNCTION(unpack) } /* Do actual unpacking */ - for (i = 0; i != arg; i++ ) { + for (i = 0; i != repetitions; i++ ) { /* Space for name + number, safe as namelen is ensured <= 200 */ - char n[256]; + char name_buffer[256]; - if (arg != 1 || namelen == 0) { - /* Need to add element number to name */ - snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1); + char* real_name; + size_t real_name_length; + + if (repetitions == 1 && namelen > 0) { + /* Use a part of the formatarg argument directly as the name. */ + real_name_length = namelen; + real_name = name; } else { - /* Truncate name to next format code or end of string */ - snprintf(n, sizeof(n), "%.*s", namelen, name); + /* Need to add the 1-based element number to the name */ + real_name_length = snprintf(name_buffer, sizeof(name_buffer), "%.*s%d", namelen, name, i + 1); + real_name = name_buffer; } if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) { @@ -902,7 +907,7 @@ PHP_FUNCTION(unpack) size = len; - add_assoc_stringl(return_value, n, &input[inputpos], len); + add_assoc_stringl_ex(return_value, real_name, real_name_length, &input[inputpos], len); break; } case 'A': { @@ -928,7 +933,7 @@ PHP_FUNCTION(unpack) break; } - add_assoc_stringl(return_value, n, &input[inputpos], len + 1); + add_assoc_stringl_ex(return_value, real_name, real_name_length, &input[inputpos], len + 1); break; } /* New option added for Z to remain in-line with the Perl implementation */ @@ -952,7 +957,7 @@ PHP_FUNCTION(unpack) } len = s; - add_assoc_stringl(return_value, n, &input[inputpos], len); + add_assoc_stringl_ex(return_value, real_name, real_name_length, &input[inputpos], len); break; } @@ -995,7 +1000,7 @@ PHP_FUNCTION(unpack) } ZSTR_VAL(buf)[len] = '\0'; - add_assoc_str(return_value, n, buf); + add_assoc_str_ex(return_value, real_name, real_name_length, buf); break; } @@ -1003,7 +1008,7 @@ PHP_FUNCTION(unpack) case 'C': { /* unsigned */ uint8_t x = input[inputpos]; zend_long v = (type == 'c') ? (int8_t) x : x; - add_assoc_long(return_value, n, v); + add_assoc_long_ex(return_value, real_name, real_name_length, v); break; } @@ -1022,7 +1027,7 @@ PHP_FUNCTION(unpack) v = x; } - add_assoc_long(return_value, n, v); + add_assoc_long_ex(return_value, real_name, real_name_length, v); break; } @@ -1030,7 +1035,7 @@ PHP_FUNCTION(unpack) case 'I': { /* unsigned integer, machine size, machine endian */ unsigned int x = *((unaligned_uint*) &input[inputpos]); zend_long v = (type == 'i') ? (int) x : x; - add_assoc_long(return_value, n, v); + add_assoc_long_ex(return_value, real_name, real_name_length, v); break; } @@ -1049,7 +1054,8 @@ PHP_FUNCTION(unpack) v = x; } - add_assoc_long(return_value, n, v); + add_assoc_long_ex(return_value, real_name, real_name_length, v); + break; } @@ -1069,7 +1075,7 @@ PHP_FUNCTION(unpack) v = x; } - add_assoc_long(return_value, n, v); + add_assoc_long_ex(return_value, real_name, real_name_length, v); break; } #endif @@ -1088,7 +1094,7 @@ PHP_FUNCTION(unpack) memcpy(&v, &input[inputpos], sizeof(float)); } - add_assoc_double(return_value, n, (double)v); + add_assoc_double_ex(return_value, real_name, real_name_length, (double)v); break; } @@ -1105,7 +1111,7 @@ PHP_FUNCTION(unpack) } else { memcpy(&v, &input[inputpos], sizeof(double)); } - add_assoc_double(return_value, n, v); + add_assoc_double_ex(return_value, real_name, real_name_length, v); break; } @@ -1116,22 +1122,22 @@ PHP_FUNCTION(unpack) case 'X': if (inputpos < size) { inputpos = -size; - i = arg - 1; /* Break out of for loop */ + i = repetitions - 1; /* Break out of for loop */ - if (arg >= 0) { + if (repetitions >= 0) { php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type); } } break; case '@': - if (arg <= inputlen) { - inputpos = arg; + if (repetitions <= inputlen) { + inputpos = repetitions; } else { php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type); } - i = arg - 1; /* Done, break out of for loop */ + i = repetitions - 1; /* Done, break out of for loop */ break; } @@ -1142,7 +1148,7 @@ PHP_FUNCTION(unpack) } inputpos = 0; } - } else if (arg < 0) { + } else if (repetitions < 0) { /* Reached end of input for '*' repeater */ break; } else { diff --git a/ext/standard/tests/strings/pack_arrays.phpt b/ext/standard/tests/strings/pack_arrays.phpt new file mode 100644 index 0000000000000..301897006c1b4 --- /dev/null +++ b/ext/standard/tests/strings/pack_arrays.phpt @@ -0,0 +1,42 @@ +--TEST-- +test unpack() to array with named keys +--FILE-- + +--EXPECT-- +Array +( + [aa] => 66051 + [bb] => 67438087 + [cc] => 134810123 +) +Array +( + [aa1] => 66051 + [aa2] => 67438087 + [cc] => 134810123 +) +Array +( + [aa1] => 66051 + [aa2] => 67438087 + [aa3] => 134810123 +) +Array +( + [aa1] => 66051 + [aa2] => 67438087 + [aa3] => 134810123 +) +Array +( + [1] => 66051 + [2] => 67438087 + [3] => 134810123 +) From b312d9739a270dd00e036228719a1740117aaf2a Mon Sep 17 00:00:00 2001 From: K Date: Thu, 13 May 2021 13:20:36 +0200 Subject: [PATCH 2/3] further improvements --- ext/standard/pack.c | 66 +++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index 449f8bef3fa83..694f7a073163b 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -870,22 +870,31 @@ PHP_FUNCTION(unpack) RETURN_FALSE; } + /* Do actual unpacking */ for (i = 0; i != repetitions; i++ ) { - /* Space for name + number, safe as namelen is ensured <= 200 */ - char name_buffer[256]; - - char* real_name; - size_t real_name_length; + zend_string* real_name; + zval val; + char buffer[32]; if (repetitions == 1 && namelen > 0) { /* Use a part of the formatarg argument directly as the name. */ - real_name_length = namelen; - real_name = name; + real_name = zend_string_init_fast(name, namelen); + } else { /* Need to add the 1-based element number to the name */ - real_name_length = snprintf(name_buffer, sizeof(name_buffer), "%.*s%d", namelen, name, i + 1); - real_name = name_buffer; + + /* hardcoded case for small-ish number of repetitions */ + if ((i+1) < 100) { + buffer[0] = '0' + (i+1) / 10; + buffer[1] = '0' + (i+1) % 10; + int digits = (i+1) < 10 ? 1 : 2; + real_name = zend_string_concat2(name, namelen, buffer + 2 - digits, digits); + + } else { + int digits = snprintf(buffer, sizeof(buffer), "%d", i+1); + real_name = zend_string_concat2(name, namelen, buffer, digits); + } } if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) { @@ -907,7 +916,8 @@ PHP_FUNCTION(unpack) size = len; - add_assoc_stringl_ex(return_value, real_name, real_name_length, &input[inputpos], len); + ZVAL_STRINGL(&val, &input[inputpos], len); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } case 'A': { @@ -933,7 +943,8 @@ PHP_FUNCTION(unpack) break; } - add_assoc_stringl_ex(return_value, real_name, real_name_length, &input[inputpos], len + 1); + ZVAL_STRINGL(&val, &input[inputpos], len + 1); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } /* New option added for Z to remain in-line with the Perl implementation */ @@ -957,7 +968,8 @@ PHP_FUNCTION(unpack) } len = s; - add_assoc_stringl_ex(return_value, real_name, real_name_length, &input[inputpos], len); + ZVAL_STRINGL(&val, &input[inputpos], len); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1000,7 +1012,9 @@ PHP_FUNCTION(unpack) } ZSTR_VAL(buf)[len] = '\0'; - add_assoc_str_ex(return_value, real_name, real_name_length, buf); + + ZVAL_STR(&val, buf); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1008,7 +1022,9 @@ PHP_FUNCTION(unpack) case 'C': { /* unsigned */ uint8_t x = input[inputpos]; zend_long v = (type == 'c') ? (int8_t) x : x; - add_assoc_long_ex(return_value, real_name, real_name_length, v); + + ZVAL_LONG(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1027,7 +1043,8 @@ PHP_FUNCTION(unpack) v = x; } - add_assoc_long_ex(return_value, real_name, real_name_length, v); + ZVAL_LONG(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1035,7 +1052,9 @@ PHP_FUNCTION(unpack) case 'I': { /* unsigned integer, machine size, machine endian */ unsigned int x = *((unaligned_uint*) &input[inputpos]); zend_long v = (type == 'i') ? (int) x : x; - add_assoc_long_ex(return_value, real_name, real_name_length, v); + + ZVAL_LONG(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1054,7 +1073,8 @@ PHP_FUNCTION(unpack) v = x; } - add_assoc_long_ex(return_value, real_name, real_name_length, v); + ZVAL_LONG(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1075,7 +1095,8 @@ PHP_FUNCTION(unpack) v = x; } - add_assoc_long_ex(return_value, real_name, real_name_length, v); + ZVAL_LONG(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } #endif @@ -1094,7 +1115,8 @@ PHP_FUNCTION(unpack) memcpy(&v, &input[inputpos], sizeof(float)); } - add_assoc_double_ex(return_value, real_name, real_name_length, (double)v); + ZVAL_DOUBLE(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1111,7 +1133,9 @@ PHP_FUNCTION(unpack) } else { memcpy(&v, &input[inputpos], sizeof(double)); } - add_assoc_double_ex(return_value, real_name, real_name_length, v); + + ZVAL_DOUBLE(&val, v); + zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val); break; } @@ -1141,6 +1165,8 @@ PHP_FUNCTION(unpack) break; } + zend_string_release(real_name); + inputpos += size; if (inputpos < 0) { if (size != -1) { /* only print warning if not working with * */ From 409449540d186b79a474844934bd239a6f5d9f0f Mon Sep 17 00:00:00 2001 From: K Date: Thu, 13 May 2021 17:49:38 +0200 Subject: [PATCH 3/3] remove memory leak and make code little bit nicer --- ext/standard/pack.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index 694f7a073163b..bfad808121680 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -873,29 +873,6 @@ PHP_FUNCTION(unpack) /* Do actual unpacking */ for (i = 0; i != repetitions; i++ ) { - zend_string* real_name; - zval val; - char buffer[32]; - - if (repetitions == 1 && namelen > 0) { - /* Use a part of the formatarg argument directly as the name. */ - real_name = zend_string_init_fast(name, namelen); - - } else { - /* Need to add the 1-based element number to the name */ - - /* hardcoded case for small-ish number of repetitions */ - if ((i+1) < 100) { - buffer[0] = '0' + (i+1) / 10; - buffer[1] = '0' + (i+1) % 10; - int digits = (i+1) < 10 ? 1 : 2; - real_name = zend_string_concat2(name, namelen, buffer + 2 - digits, digits); - - } else { - int digits = snprintf(buffer, sizeof(buffer), "%d", i+1); - real_name = zend_string_concat2(name, namelen, buffer, digits); - } - } if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) { php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type); @@ -904,6 +881,22 @@ PHP_FUNCTION(unpack) } if ((inputpos + size) <= inputlen) { + + zend_string* real_name; + zval val; + + if (repetitions == 1 && namelen > 0) { + /* Use a part of the formatarg argument directly as the name. */ + real_name = zend_string_init_fast(name, namelen); + + } else { + /* Need to add the 1-based element number to the name */ + char buf[MAX_LENGTH_OF_LONG + 1]; + char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1); + size_t digits = buf + sizeof(buf) - 1 - res; + real_name = zend_string_concat2(name, namelen, res, digits); + } + switch ((int) type) { case 'a': { /* a will not strip any trailing whitespace or null padding */