Skip to content

Commit b6c2308

Browse files
committed
Implement transitive comparison for SORT_REGULAR
- Add zend_compare_{long,double}_to_string_ex() plus zendi_smart_strcmp_ex() so SORT_REGULAR can invoke transitive-aware scalar comparisons without touching zend_compare() - Introduce php_array_compare_transitive() (pared-down zend_compare()) and php_array_compare_transitive_objects() (mirrors zend_std_compare_objects()) so arrays, objects, and enums recurse with transitive ordering - Route the public sort APIs and array_unique() through php_array_sort_regular() so PHP_SORT_REGULAR always uses the new comparator - Add regression tests: GH-20262 (array_unique with enums/objects/nested arrays) plus SORT_REGULAR consistency tests for sort()/ksort() on numeric-string edge cases Fixes: GH-20262
1 parent 8398038 commit b6c2308

File tree

6 files changed

+1008
-106
lines changed

6 files changed

+1008
-106
lines changed

Zend/zend_operators.c

Lines changed: 3 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,47 +2259,13 @@ ZEND_API zend_result ZEND_FASTCALL compare_function(zval *result, zval *op1, zva
22592259

22602260
static int compare_long_to_string(zend_long lval, zend_string *str) /* {{{ */
22612261
{
2262-
zend_long str_lval;
2263-
double str_dval;
2264-
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
2265-
2266-
if (type == IS_LONG) {
2267-
return lval > str_lval ? 1 : lval < str_lval ? -1 : 0;
2268-
}
2269-
2270-
if (type == IS_DOUBLE) {
2271-
return ZEND_THREEWAY_COMPARE((double) lval, str_dval);
2272-
}
2273-
2274-
zend_string *lval_as_str = zend_long_to_str(lval);
2275-
int cmp_result = zend_binary_strcmp(
2276-
ZSTR_VAL(lval_as_str), ZSTR_LEN(lval_as_str), ZSTR_VAL(str), ZSTR_LEN(str));
2277-
zend_string_release(lval_as_str);
2278-
return ZEND_NORMALIZE_BOOL(cmp_result);
2262+
return zend_compare_long_to_string_ex(lval, str, false);
22792263
}
22802264
/* }}} */
22812265

22822266
static int compare_double_to_string(double dval, zend_string *str) /* {{{ */
22832267
{
2284-
zend_long str_lval;
2285-
double str_dval;
2286-
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
2287-
2288-
ZEND_ASSERT(!zend_isnan(dval));
2289-
2290-
if (type == IS_LONG) {
2291-
return ZEND_THREEWAY_COMPARE(dval, (double) str_lval);
2292-
}
2293-
2294-
if (type == IS_DOUBLE) {
2295-
return ZEND_THREEWAY_COMPARE(dval, str_dval);
2296-
}
2297-
2298-
zend_string *dval_as_str = zend_double_to_str(dval);
2299-
int cmp_result = zend_binary_strcmp(
2300-
ZSTR_VAL(dval_as_str), ZSTR_LEN(dval_as_str), ZSTR_VAL(str), ZSTR_LEN(str));
2301-
zend_string_release(dval_as_str);
2302-
return ZEND_NORMALIZE_BOOL(cmp_result);
2268+
return zend_compare_double_to_string_ex(dval, str, false);
23032269
}
23042270
/* }}} */
23052271

@@ -3420,52 +3386,7 @@ ZEND_API bool ZEND_FASTCALL zendi_smart_streq(zend_string *s1, zend_string *s2)
34203386

34213387
ZEND_API int ZEND_FASTCALL zendi_smart_strcmp(zend_string *s1, zend_string *s2) /* {{{ */
34223388
{
3423-
uint8_t ret1, ret2;
3424-
int oflow1, oflow2;
3425-
zend_long lval1 = 0, lval2 = 0;
3426-
double dval1 = 0.0, dval2 = 0.0;
3427-
3428-
if ((ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, false, &oflow1, NULL)) &&
3429-
(ret2 = is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, false, &oflow2, NULL))) {
3430-
#if ZEND_ULONG_MAX == 0xFFFFFFFF
3431-
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. &&
3432-
((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/)
3433-
|| (oflow1 == -1 && dval1 < -9007199254740991.))) {
3434-
#else
3435-
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0.) {
3436-
#endif
3437-
/* both values are integers overflowed to the same side, and the
3438-
* double comparison may have resulted in crucial accuracy lost */
3439-
goto string_cmp;
3440-
}
3441-
if ((ret1 == IS_DOUBLE) || (ret2 == IS_DOUBLE)) {
3442-
if (ret1 != IS_DOUBLE) {
3443-
if (oflow2) {
3444-
/* 2nd operand is integer > LONG_MAX (oflow2==1) or < LONG_MIN (-1) */
3445-
return -1 * oflow2;
3446-
}
3447-
dval1 = (double) lval1;
3448-
} else if (ret2 != IS_DOUBLE) {
3449-
if (oflow1) {
3450-
return oflow1;
3451-
}
3452-
dval2 = (double) lval2;
3453-
} else if (dval1 == dval2 && !zend_finite(dval1)) {
3454-
/* Both values overflowed and have the same sign,
3455-
* so a numeric comparison would be inaccurate */
3456-
goto string_cmp;
3457-
}
3458-
dval1 = dval1 - dval2;
3459-
return ZEND_NORMALIZE_BOOL(dval1);
3460-
} else { /* they both have to be long's */
3461-
return lval1 > lval2 ? 1 : (lval1 < lval2 ? -1 : 0);
3462-
}
3463-
} else {
3464-
int strcmp_ret;
3465-
string_cmp:
3466-
strcmp_ret = zend_binary_strcmp(s1->val, s1->len, s2->val, s2->len);
3467-
return ZEND_NORMALIZE_BOOL(strcmp_ret);
3468-
}
3389+
return zendi_smart_strcmp_ex(s1, s2, false);
34693390
}
34703391
/* }}} */
34713392

Zend/zend_operators.h

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,149 @@ zend_memnistr(const char *haystack, const char *needle, size_t needle_len, const
10681068
return NULL;
10691069
}
10701070

1071+
static zend_always_inline int zend_compare_non_numeric_strings(zend_string *s1, zend_string *s2)
1072+
{
1073+
size_t min_len = ZSTR_LEN(s1) < ZSTR_LEN(s2) ? ZSTR_LEN(s1) : ZSTR_LEN(s2);
1074+
int cmp = memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), min_len);
1075+
if (cmp != 0) {
1076+
return cmp < 0 ? -1 : 1;
1077+
}
1078+
return ZEND_THREEWAY_COMPARE(ZSTR_LEN(s1), ZSTR_LEN(s2));
1079+
}
1080+
1081+
static zend_always_inline int zend_compare_long_to_string_ex(zend_long lval, zend_string *str, bool transitive)
1082+
{
1083+
zend_long str_lval;
1084+
double str_dval;
1085+
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
1086+
1087+
if (type == IS_LONG) {
1088+
return ZEND_THREEWAY_COMPARE(lval, str_lval);
1089+
}
1090+
1091+
if (type == IS_DOUBLE) {
1092+
return ZEND_THREEWAY_COMPARE((double) lval, str_dval);
1093+
}
1094+
1095+
if (transitive) {
1096+
if (ZSTR_LEN(str) == 0) {
1097+
return 1;
1098+
}
1099+
return -1;
1100+
}
1101+
1102+
zend_string *lval_as_str = zend_long_to_str(lval);
1103+
int cmp_result = zend_binary_strcmp(
1104+
ZSTR_VAL(lval_as_str), ZSTR_LEN(lval_as_str), ZSTR_VAL(str), ZSTR_LEN(str));
1105+
zend_string_release(lval_as_str);
1106+
return ZEND_NORMALIZE_BOOL(cmp_result);
1107+
}
1108+
1109+
static zend_always_inline int zend_compare_double_to_string_ex(double dval, zend_string *str, bool transitive)
1110+
{
1111+
zend_long str_lval;
1112+
double str_dval;
1113+
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
1114+
1115+
ZEND_ASSERT(!zend_isnan(dval));
1116+
1117+
if (type == IS_LONG) {
1118+
str_dval = (double) str_lval;
1119+
return ZEND_THREEWAY_COMPARE(dval, str_dval);
1120+
}
1121+
1122+
if (type == IS_DOUBLE) {
1123+
return ZEND_THREEWAY_COMPARE(dval, str_dval);
1124+
}
1125+
1126+
if (transitive) {
1127+
if (ZSTR_LEN(str) == 0) {
1128+
return 1;
1129+
}
1130+
return -1;
1131+
}
1132+
1133+
zend_string *dval_as_str = zend_double_to_str(dval);
1134+
int cmp_result = zend_binary_strcmp(
1135+
ZSTR_VAL(dval_as_str), ZSTR_LEN(dval_as_str), ZSTR_VAL(str), ZSTR_LEN(str));
1136+
zend_string_release(dval_as_str);
1137+
return ZEND_NORMALIZE_BOOL(cmp_result);
1138+
}
1139+
1140+
static zend_always_inline int zendi_smart_strcmp_ex(zend_string *s1, zend_string *s2, bool transitive)
1141+
{
1142+
uint8_t ret1, ret2;
1143+
int oflow1, oflow2;
1144+
zend_long lval1 = 0, lval2 = 0;
1145+
double dval1 = 0.0, dval2 = 0.0;
1146+
1147+
if (UNEXPECTED(ZSTR_LEN(s1) == 0 || ZSTR_LEN(s2) == 0)) {
1148+
if (transitive) {
1149+
if (ZSTR_LEN(s1) == 0 && ZSTR_LEN(s2) == 0) {
1150+
return 0;
1151+
}
1152+
return ZSTR_LEN(s1) == 0 ? -1 : 1;
1153+
}
1154+
}
1155+
1156+
ret1 = is_numeric_string_ex(ZSTR_VAL(s1), ZSTR_LEN(s1), &lval1, &dval1, false, &oflow1, NULL);
1157+
ret2 = is_numeric_string_ex(ZSTR_VAL(s2), ZSTR_LEN(s2), &lval2, &dval2, false, &oflow2, NULL);
1158+
1159+
if (ret1 && ret2) {
1160+
#if ZEND_ULONG_MAX == 0xFFFFFFFF
1161+
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. &&
1162+
((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/)
1163+
|| (oflow1 == -1 && dval1 < -9007199254740991.))) {
1164+
#else
1165+
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0.) {
1166+
#endif
1167+
/* both values are integers overflowed to the same side, and the
1168+
* double comparison may have resulted in crucial accuracy lost */
1169+
goto string_cmp;
1170+
}
1171+
if ((ret1 == IS_DOUBLE) || (ret2 == IS_DOUBLE)) {
1172+
if (ret1 != IS_DOUBLE) {
1173+
if (oflow2) {
1174+
/* 2nd operand is integer > LONG_MAX (oflow2==1) or < LONG_MIN (-1) */
1175+
return -1 * oflow2;
1176+
}
1177+
dval1 = (double) lval1;
1178+
} else if (ret2 != IS_DOUBLE) {
1179+
if (oflow1) {
1180+
return oflow1;
1181+
}
1182+
dval2 = (double) lval2;
1183+
} else if (dval1 == dval2 && !zend_finite(dval1)) {
1184+
/* Both values overflowed and have the same sign,
1185+
* so a numeric comparison would be inaccurate */
1186+
goto string_cmp;
1187+
}
1188+
dval1 = dval1 - dval2;
1189+
return ZEND_NORMALIZE_BOOL(dval1);
1190+
} else { /* they both have to be long's */
1191+
return lval1 > lval2 ? 1 : (lval1 < lval2 ? -1 : 0);
1192+
}
1193+
} else if (ret1) {
1194+
if (transitive) {
1195+
return -1;
1196+
}
1197+
goto string_cmp;
1198+
} else if (ret2) {
1199+
if (transitive) {
1200+
return 1;
1201+
}
1202+
goto string_cmp;
1203+
}
1204+
1205+
int strcmp_ret;
1206+
string_cmp:
1207+
if (transitive) {
1208+
return zend_compare_non_numeric_strings(s1, s2);
1209+
}
1210+
1211+
strcmp_ret = zend_binary_strcmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2));
1212+
return ZEND_NORMALIZE_BOOL(strcmp_ret);
1213+
}
10711214

10721215
END_EXTERN_C()
10731216

0 commit comments

Comments
 (0)