Skip to content

optimize unpack() for named fields #6958

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 14, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 68 additions & 36 deletions ext/standard/pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -739,32 +739,32 @@ PHP_FUNCTION(unpack)
while (formatlen-- > 0) {
char type = *(format++);
char c;
int arg = 1, argb;
int repetitions = 1, argb;
char *name;
int namelen;
int size=0;
int size = 0;

/* Handle format arguments if any */
if (formatlen > 0) {
c = *format;

if (c >= '0' && c <= '9') {
arg = atoi(format);
repetitions = atoi(format);

while (formatlen > 0 && *format >= '0' && *format <= '9') {
format++;
formatlen--;
}
} else if (c == '*') {
arg = -1;
repetitions = -1;
format++;
formatlen--;
}
}

/* Get of new value in array */
name = format;
argb = arg;
argb = repetitions;

while (formatlen > 0 && *format != '/') {
formatlen--;
Expand All @@ -780,9 +780,9 @@ PHP_FUNCTION(unpack)
/* Never use any input */
case 'X':
size = -1;
if (arg < 0) {
if (repetitions < 0) {
php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
arg = 1;
repetitions = 1;
}
break;

Expand All @@ -793,14 +793,14 @@ PHP_FUNCTION(unpack)
case 'a':
case 'A':
case 'Z':
size = arg;
arg = 1;
size = repetitions;
repetitions = 1;
break;

case 'h':
case 'H':
size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg;
arg = 1;
size = (repetitions > 0) ? (repetitions + (repetitions % 2)) / 2 : repetitions;
repetitions = 1;
break;

/* Use 1 byte of input */
Expand Down Expand Up @@ -870,17 +870,31 @@ PHP_FUNCTION(unpack)
RETURN_FALSE;
}


/* Do actual unpacking */
for (i = 0; i != arg; i++ ) {
/* Space for name + number, safe as namelen is ensured <= 200 */
char n[256];
for (i = 0; i != repetitions; i++ ) {
zend_string* real_name;
zval val;
char buffer[32];

if (repetitions == 1 && namelen > 0) {
/* Use a part of the formatarg argument directly as the name. */
real_name = zend_string_init_fast(name, namelen);

if (arg != 1 || namelen == 0) {
/* Need to add element number to name */
snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1);
} else {
/* Truncate name to next format code or end of string */
snprintf(n, sizeof(n), "%.*s", namelen, name);
/* Need to add the 1-based element number to the name */

/* hardcoded case for small-ish number of repetitions */
if ((i+1) < 100) {
buffer[0] = '0' + (i+1) / 10;
buffer[1] = '0' + (i+1) % 10;
int digits = (i+1) < 10 ? 1 : 2;
real_name = zend_string_concat2(name, namelen, buffer + 2 - digits, digits);

} else {
int digits = snprintf(buffer, sizeof(buffer), "%d", i+1);
real_name = zend_string_concat2(name, namelen, buffer, digits);
}
}

if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
Expand All @@ -902,7 +916,8 @@ PHP_FUNCTION(unpack)

size = len;

add_assoc_stringl(return_value, n, &input[inputpos], len);
ZVAL_STRINGL(&val, &input[inputpos], len);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}
case 'A': {
Expand All @@ -928,7 +943,8 @@ PHP_FUNCTION(unpack)
break;
}

add_assoc_stringl(return_value, n, &input[inputpos], len + 1);
ZVAL_STRINGL(&val, &input[inputpos], len + 1);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}
/* New option added for Z to remain in-line with the Perl implementation */
Expand All @@ -952,7 +968,8 @@ PHP_FUNCTION(unpack)
}
len = s;

add_assoc_stringl(return_value, n, &input[inputpos], len);
ZVAL_STRINGL(&val, &input[inputpos], len);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

Expand Down Expand Up @@ -995,15 +1012,19 @@ PHP_FUNCTION(unpack)
}

ZSTR_VAL(buf)[len] = '\0';
add_assoc_str(return_value, n, buf);

ZVAL_STR(&val, buf);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

case 'c': /* signed */
case 'C': { /* unsigned */
uint8_t x = input[inputpos];
zend_long v = (type == 'c') ? (int8_t) x : x;
add_assoc_long(return_value, n, v);

ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

Expand All @@ -1022,15 +1043,18 @@ PHP_FUNCTION(unpack)
v = x;
}

add_assoc_long(return_value, n, v);
ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

case 'i': /* signed integer, machine size, machine endian */
case 'I': { /* unsigned integer, machine size, machine endian */
unsigned int x = *((unaligned_uint*) &input[inputpos]);
zend_long v = (type == 'i') ? (int) x : x;
add_assoc_long(return_value, n, v);

ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

Expand All @@ -1049,7 +1073,9 @@ PHP_FUNCTION(unpack)
v = x;
}

add_assoc_long(return_value, n, v);
ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);

break;
}

Expand All @@ -1069,7 +1095,8 @@ PHP_FUNCTION(unpack)
v = x;
}

add_assoc_long(return_value, n, v);
ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}
#endif
Expand All @@ -1088,7 +1115,8 @@ PHP_FUNCTION(unpack)
memcpy(&v, &input[inputpos], sizeof(float));
}

add_assoc_double(return_value, n, (double)v);
ZVAL_DOUBLE(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

Expand All @@ -1105,7 +1133,9 @@ PHP_FUNCTION(unpack)
} else {
memcpy(&v, &input[inputpos], sizeof(double));
}
add_assoc_double(return_value, n, v);

ZVAL_DOUBLE(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break;
}

Expand All @@ -1116,33 +1146,35 @@ PHP_FUNCTION(unpack)
case 'X':
if (inputpos < size) {
inputpos = -size;
i = arg - 1; /* Break out of for loop */
i = repetitions - 1; /* Break out of for loop */

if (arg >= 0) {
if (repetitions >= 0) {
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
}
}
break;

case '@':
if (arg <= inputlen) {
inputpos = arg;
if (repetitions <= inputlen) {
inputpos = repetitions;
} else {
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
}

i = arg - 1; /* Done, break out of for loop */
i = repetitions - 1; /* Done, break out of for loop */
break;
}

zend_string_release(real_name);

inputpos += size;
if (inputpos < 0) {
if (size != -1) { /* only print warning if not working with * */
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
}
inputpos = 0;
}
} else if (arg < 0) {
} else if (repetitions < 0) {
/* Reached end of input for '*' repeater */
break;
} else {
Expand Down
42 changes: 42 additions & 0 deletions ext/standard/tests/strings/pack_arrays.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
--TEST--
test unpack() to array with named keys
--FILE--
<?php
$str = pack('VVV', 0x00010203, 0x04050607, 0x08090a0b);
print_r(unpack('Vaa/Vbb/Vcc', $str));
print_r(unpack('V2aa/Vcc', $str));
print_r(unpack('V3aa', $str));
print_r(unpack('V*aa', $str));
print_r(unpack('V*', $str));
?>
--EXPECT--
Array
(
[aa] => 66051
[bb] => 67438087
[cc] => 134810123
)
Array
(
[aa1] => 66051
[aa2] => 67438087
[cc] => 134810123
)
Array
(
[aa1] => 66051
[aa2] => 67438087
[aa3] => 134810123
)
Array
(
[aa1] => 66051
[aa2] => 67438087
[aa3] => 134810123
)
Array
(
[1] => 66051
[2] => 67438087
[3] => 134810123
)