diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 2550fcbeb1cde..e26ac4f6d2ee5 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -2259,47 +2259,13 @@ ZEND_API zend_result ZEND_FASTCALL compare_function(zval *result, zval *op1, zva static int compare_long_to_string(zend_long lval, zend_string *str) /* {{{ */ { - zend_long str_lval; - double str_dval; - uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0); - - if (type == IS_LONG) { - return lval > str_lval ? 1 : lval < str_lval ? -1 : 0; - } - - if (type == IS_DOUBLE) { - return ZEND_THREEWAY_COMPARE((double) lval, str_dval); - } - - zend_string *lval_as_str = zend_long_to_str(lval); - int cmp_result = zend_binary_strcmp( - ZSTR_VAL(lval_as_str), ZSTR_LEN(lval_as_str), ZSTR_VAL(str), ZSTR_LEN(str)); - zend_string_release(lval_as_str); - return ZEND_NORMALIZE_BOOL(cmp_result); + return zend_compare_long_to_string_ex(lval, str, false); } /* }}} */ static int compare_double_to_string(double dval, zend_string *str) /* {{{ */ { - zend_long str_lval; - double str_dval; - uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0); - - ZEND_ASSERT(!zend_isnan(dval)); - - if (type == IS_LONG) { - return ZEND_THREEWAY_COMPARE(dval, (double) str_lval); - } - - if (type == IS_DOUBLE) { - return ZEND_THREEWAY_COMPARE(dval, str_dval); - } - - zend_string *dval_as_str = zend_double_to_str(dval); - int cmp_result = zend_binary_strcmp( - ZSTR_VAL(dval_as_str), ZSTR_LEN(dval_as_str), ZSTR_VAL(str), ZSTR_LEN(str)); - zend_string_release(dval_as_str); - return ZEND_NORMALIZE_BOOL(cmp_result); + return zend_compare_double_to_string_ex(dval, str, false); } /* }}} */ @@ -3420,52 +3386,7 @@ ZEND_API bool ZEND_FASTCALL zendi_smart_streq(zend_string *s1, zend_string *s2) ZEND_API int ZEND_FASTCALL zendi_smart_strcmp(zend_string *s1, zend_string *s2) /* {{{ */ { - uint8_t ret1, ret2; - int oflow1, oflow2; - zend_long lval1 = 0, lval2 = 0; - double dval1 = 0.0, dval2 = 0.0; - - if ((ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, false, &oflow1, NULL)) && - (ret2 = is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, false, &oflow2, NULL))) { -#if ZEND_ULONG_MAX == 0xFFFFFFFF - if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. && - ((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/) - || (oflow1 == -1 && dval1 < -9007199254740991.))) { -#else - if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0.) { -#endif - /* both values are integers overflowed to the same side, and the - * double comparison may have resulted in crucial accuracy lost */ - goto string_cmp; - } - if ((ret1 == IS_DOUBLE) || (ret2 == IS_DOUBLE)) { - if (ret1 != IS_DOUBLE) { - if (oflow2) { - /* 2nd operand is integer > LONG_MAX (oflow2==1) or < LONG_MIN (-1) */ - return -1 * oflow2; - } - dval1 = (double) lval1; - } else if (ret2 != IS_DOUBLE) { - if (oflow1) { - return oflow1; - } - dval2 = (double) lval2; - } else if (dval1 == dval2 && !zend_finite(dval1)) { - /* Both values overflowed and have the same sign, - * so a numeric comparison would be inaccurate */ - goto string_cmp; - } - dval1 = dval1 - dval2; - return ZEND_NORMALIZE_BOOL(dval1); - } else { /* they both have to be long's */ - return lval1 > lval2 ? 1 : (lval1 < lval2 ? -1 : 0); - } - } else { - int strcmp_ret; -string_cmp: - strcmp_ret = zend_binary_strcmp(s1->val, s1->len, s2->val, s2->len); - return ZEND_NORMALIZE_BOOL(strcmp_ret); - } + return zendi_smart_strcmp_ex(s1, s2, false); } /* }}} */ diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index ff31c84c41e5e..4c32d16e8145e 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -1068,6 +1068,149 @@ zend_memnistr(const char *haystack, const char *needle, size_t needle_len, const return NULL; } +static zend_always_inline int zend_compare_non_numeric_strings(zend_string *s1, zend_string *s2) +{ + size_t min_len = ZSTR_LEN(s1) < ZSTR_LEN(s2) ? ZSTR_LEN(s1) : ZSTR_LEN(s2); + int cmp = memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), min_len); + if (cmp != 0) { + return cmp < 0 ? -1 : 1; + } + return ZEND_THREEWAY_COMPARE(ZSTR_LEN(s1), ZSTR_LEN(s2)); +} + +static zend_always_inline int zend_compare_long_to_string_ex(zend_long lval, zend_string *str, bool transitive) +{ + zend_long str_lval; + double str_dval; + uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0); + + if (type == IS_LONG) { + return ZEND_THREEWAY_COMPARE(lval, str_lval); + } + + if (type == IS_DOUBLE) { + return ZEND_THREEWAY_COMPARE((double) lval, str_dval); + } + + if (transitive) { + if (ZSTR_LEN(str) == 0) { + return 1; + } + return -1; + } + + zend_string *lval_as_str = zend_long_to_str(lval); + int cmp_result = zend_binary_strcmp( + ZSTR_VAL(lval_as_str), ZSTR_LEN(lval_as_str), ZSTR_VAL(str), ZSTR_LEN(str)); + zend_string_release(lval_as_str); + return ZEND_NORMALIZE_BOOL(cmp_result); +} + +static zend_always_inline int zend_compare_double_to_string_ex(double dval, zend_string *str, bool transitive) +{ + zend_long str_lval; + double str_dval; + uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0); + + ZEND_ASSERT(!zend_isnan(dval)); + + if (type == IS_LONG) { + str_dval = (double) str_lval; + return ZEND_THREEWAY_COMPARE(dval, str_dval); + } + + if (type == IS_DOUBLE) { + return ZEND_THREEWAY_COMPARE(dval, str_dval); + } + + if (transitive) { + if (ZSTR_LEN(str) == 0) { + return 1; + } + return -1; + } + + zend_string *dval_as_str = zend_double_to_str(dval); + int cmp_result = zend_binary_strcmp( + ZSTR_VAL(dval_as_str), ZSTR_LEN(dval_as_str), ZSTR_VAL(str), ZSTR_LEN(str)); + zend_string_release(dval_as_str); + return ZEND_NORMALIZE_BOOL(cmp_result); +} + +static zend_always_inline int zendi_smart_strcmp_ex(zend_string *s1, zend_string *s2, bool transitive) +{ + uint8_t ret1, ret2; + int oflow1, oflow2; + zend_long lval1 = 0, lval2 = 0; + double dval1 = 0.0, dval2 = 0.0; + + if (UNEXPECTED(ZSTR_LEN(s1) == 0 || ZSTR_LEN(s2) == 0)) { + if (transitive) { + if (ZSTR_LEN(s1) == 0 && ZSTR_LEN(s2) == 0) { + return 0; + } + return ZSTR_LEN(s1) == 0 ? -1 : 1; + } + } + + ret1 = is_numeric_string_ex(ZSTR_VAL(s1), ZSTR_LEN(s1), &lval1, &dval1, false, &oflow1, NULL); + ret2 = is_numeric_string_ex(ZSTR_VAL(s2), ZSTR_LEN(s2), &lval2, &dval2, false, &oflow2, NULL); + + if (ret1 && ret2) { +#if ZEND_ULONG_MAX == 0xFFFFFFFF + if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. && + ((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/) + || (oflow1 == -1 && dval1 < -9007199254740991.))) { +#else + if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0.) { +#endif + /* both values are integers overflowed to the same side, and the + * double comparison may have resulted in crucial accuracy lost */ + goto string_cmp; + } + if ((ret1 == IS_DOUBLE) || (ret2 == IS_DOUBLE)) { + if (ret1 != IS_DOUBLE) { + if (oflow2) { + /* 2nd operand is integer > LONG_MAX (oflow2==1) or < LONG_MIN (-1) */ + return -1 * oflow2; + } + dval1 = (double) lval1; + } else if (ret2 != IS_DOUBLE) { + if (oflow1) { + return oflow1; + } + dval2 = (double) lval2; + } else if (dval1 == dval2 && !zend_finite(dval1)) { + /* Both values overflowed and have the same sign, + * so a numeric comparison would be inaccurate */ + goto string_cmp; + } + dval1 = dval1 - dval2; + return ZEND_NORMALIZE_BOOL(dval1); + } else { /* they both have to be long's */ + return lval1 > lval2 ? 1 : (lval1 < lval2 ? -1 : 0); + } + } else if (ret1) { + if (transitive) { + return -1; + } + goto string_cmp; + } else if (ret2) { + if (transitive) { + return 1; + } + goto string_cmp; + } + + int strcmp_ret; +string_cmp: + if (transitive) { + return zend_compare_non_numeric_strings(s1, s2); + } + + strcmp_ret = zend_binary_strcmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)); + return ZEND_NORMALIZE_BOOL(strcmp_ret); +} END_EXTERN_C() diff --git a/ext/standard/array.c b/ext/standard/array.c index f1b25387db060..3b577181a0b8b 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -28,6 +28,7 @@ #include #include "zend_globals.h" #include "zend_interfaces.h" +#include "zend_enum.h" #include "php_array.h" #include "basic_functions.h" #include "php_string.h" @@ -104,6 +105,10 @@ static zend_never_inline ZEND_COLD int stable_sort_fallback(Bucket *a, Bucket *b return stable_sort_fallback((a), (b)); \ } while (0) +#define PHP_ARRAY_TYPE_PAIR(t1,t2) (((t1) << 4) | (t2)) + +static int php_array_compare_transitive(zval *op1, zval *op2); + /* Generate inlined unstable and stable variants, and non-inlined reversed variants. */ #define DEFINE_SORT_VARIANTS(name) \ static zend_never_inline int php_array_##name##_unstable(Bucket *a, Bucket *b) { \ @@ -117,31 +122,7 @@ static zend_never_inline ZEND_COLD int stable_sort_fallback(Bucket *a, Bucket *b } \ static zend_never_inline int php_array_reverse_##name(Bucket *a, Bucket *b) { \ RETURN_STABLE_SORT(a, b, php_array_reverse_##name##_unstable(a, b)); \ - } \ - -static zend_always_inline int php_array_key_compare_unstable_i(Bucket *f, Bucket *s) /* {{{ */ -{ - zval first; - zval second; - - if (f->key == NULL && s->key == NULL) { - return (zend_long)f->h > (zend_long)s->h ? 1 : -1; - } else if (f->key && s->key) { - return zendi_smart_strcmp(f->key, s->key); } - if (f->key) { - ZVAL_STR(&first, f->key); - } else { - ZVAL_LONG(&first, f->h); - } - if (s->key) { - ZVAL_STR(&second, s->key); - } else { - ZVAL_LONG(&second, s->h); - } - return zend_compare(&first, &second); -} -/* }}} */ static zend_always_inline int php_array_key_compare_numeric_unstable_i(Bucket *f, Bucket *s) /* {{{ */ { @@ -283,32 +264,6 @@ static zend_always_inline int php_array_key_compare_string_locale_unstable_i(Buc } /* }}} */ -static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucket *s) /* {{{ */ -{ - int result = zend_compare(&f->val, &s->val); - /* Special enums handling for array_unique. We don't want to add this logic to zend_compare as - * that would be observable via comparison operators. */ - zval *rhs = &s->val; - ZVAL_DEREF(rhs); - if (UNEXPECTED(Z_TYPE_P(rhs) == IS_OBJECT) - && result == ZEND_UNCOMPARABLE - && (Z_OBJCE_P(rhs)->ce_flags & ZEND_ACC_ENUM)) { - zval *lhs = &f->val; - ZVAL_DEREF(lhs); - if (Z_TYPE_P(lhs) == IS_OBJECT && (Z_OBJCE_P(lhs)->ce_flags & ZEND_ACC_ENUM)) { - // Order doesn't matter, we just need to group the same enum values - uintptr_t lhs_uintptr = (uintptr_t)Z_OBJ_P(lhs); - uintptr_t rhs_uintptr = (uintptr_t)Z_OBJ_P(rhs); - return lhs_uintptr == rhs_uintptr ? 0 : (lhs_uintptr < rhs_uintptr ? -1 : 1); - } else { - // Shift enums to the end of the array - return -1; - } - } - return result; -} -/* }}} */ - static zend_always_inline int php_array_data_compare_numeric_unstable_i(Bucket *f, Bucket *s) /* {{{ */ { return numeric_compare_function(&f->val, &s->val); @@ -359,12 +314,10 @@ static int php_array_data_compare_string_locale_unstable_i(Bucket *f, Bucket *s) } /* }}} */ -DEFINE_SORT_VARIANTS(key_compare); DEFINE_SORT_VARIANTS(key_compare_numeric); DEFINE_SORT_VARIANTS(key_compare_string_case); DEFINE_SORT_VARIANTS(key_compare_string); DEFINE_SORT_VARIANTS(key_compare_string_locale); -DEFINE_SORT_VARIANTS(data_compare); DEFINE_SORT_VARIANTS(data_compare_numeric); DEFINE_SORT_VARIANTS(data_compare_string_case); DEFINE_SORT_VARIANTS(data_compare_string); @@ -372,6 +325,378 @@ DEFINE_SORT_VARIANTS(data_compare_string_locale); DEFINE_SORT_VARIANTS(natural_compare); DEFINE_SORT_VARIANTS(natural_case_compare); +static zend_always_inline bool php_array_is_enum_zval(zval *zv) +{ + return Z_TYPE_P(zv) == IS_OBJECT && (Z_OBJCE_P(zv)->ce_flags & ZEND_ACC_ENUM); +} + +static zend_always_inline int php_array_compare_enum_zvals(zval *lhs, zval *rhs) +{ + const bool lhs_enum = php_array_is_enum_zval(lhs); + const bool rhs_enum = php_array_is_enum_zval(rhs); + + if (lhs_enum && rhs_enum) { + zend_object *lhs_obj = Z_OBJ_P(lhs); + zend_object *rhs_obj = Z_OBJ_P(rhs); + + if (lhs_obj == rhs_obj) { + return 0; + } + + if (lhs_obj->ce != rhs_obj->ce) { + return zend_compare_non_numeric_strings(lhs_obj->ce->name, rhs_obj->ce->name); + } + + if (lhs_obj->ce->enum_backing_type != IS_UNDEF) { + zval *lhs_value = zend_enum_fetch_case_value(lhs_obj); + zval *rhs_value = zend_enum_fetch_case_value(rhs_obj); + + if (lhs_obj->ce->enum_backing_type == IS_LONG) { + zend_long lhs_long = Z_LVAL_P(lhs_value); + zend_long rhs_long = Z_LVAL_P(rhs_value); + if (lhs_long != rhs_long) { + return lhs_long < rhs_long ? -1 : 1; + } + } else { + ZEND_ASSERT(lhs_obj->ce->enum_backing_type == IS_STRING); + zend_string *lhs_str = Z_STR_P(lhs_value); + zend_string *rhs_str = Z_STR_P(rhs_value); + if (lhs_str != rhs_str) { + zend_ulong lhs_hash = ZSTR_HASH(lhs_str); + zend_ulong rhs_hash = ZSTR_HASH(rhs_str); + if (lhs_hash != rhs_hash) { + return lhs_hash < rhs_hash ? -1 : 1; + } + int cmp = zend_compare_non_numeric_strings(lhs_str, rhs_str); + if (cmp != 0) { + return cmp; + } + } + } + } + + zend_string *lhs_case = Z_STR_P(zend_enum_fetch_case_name(lhs_obj)); + zend_string *rhs_case = Z_STR_P(zend_enum_fetch_case_name(rhs_obj)); + if (lhs_case != rhs_case) { + zend_ulong lhs_hash = ZSTR_HASH(lhs_case); + zend_ulong rhs_hash = ZSTR_HASH(rhs_case); + if (lhs_hash != rhs_hash) { + return lhs_hash < rhs_hash ? -1 : 1; + } + int cmp = zend_compare_non_numeric_strings(lhs_case, rhs_case); + if (cmp != 0) { + return cmp; + } + } + + /* Should not happen for userland enums, but keep ordering deterministic for transitivity. */ + return lhs_obj->handle < rhs_obj->handle ? -1 : 1; + } + + return lhs_enum ? 1 : -1; +} + +static int php_array_hash_compare_transitive(zval *zv1, zval *zv2) /* {{{ */ +{ + return php_array_compare_transitive(zv1, zv2); +} +/* }}} */ + +static int php_array_compare_transitive_symbol_tables(HashTable *ht1, HashTable *ht2) /* {{{ */ +{ + if (ht1 == ht2) { + return 0; + } + + GC_TRY_ADDREF(ht1); + GC_TRY_ADDREF(ht2); + + int ret = zend_hash_compare(ht1, ht2, (compare_func_t) php_array_hash_compare_transitive, 0); + + GC_TRY_DTOR_NO_REF(ht1); + GC_TRY_DTOR_NO_REF(ht2); + + return ret; +} +/* }}} */ + +static int php_array_compare_transitive_arrays(zval *a1, zval *a2) /* {{{ */ +{ + return php_array_compare_transitive_symbol_tables(Z_ARRVAL_P(a1), Z_ARRVAL_P(a2)); +} +/* }}} */ + +/* Mirrors zend_std_compare_objects(), but recurses via php_array_compare_transitive() + * so nested properties obey SORT_REGULAR's transitive ordering. */ +static int php_array_compare_transitive_objects(zval *o1, zval *o2) /* {{{ */ +{ + if (Z_TYPE_P(o1) != IS_OBJECT || Z_TYPE_P(o2) != IS_OBJECT) { + return zend_compare(o1, o2); + } + + if (Z_OBJ_HT_P(o1)->compare && Z_OBJ_HT_P(o1)->compare != zend_std_compare_objects) { + return Z_OBJ_HT_P(o1)->compare(o1, o2); + } + + zend_object *zobj1 = Z_OBJ_P(o1); + zend_object *zobj2 = Z_OBJ_P(o2); + + if (zobj1 == zobj2) { + return 0; /* the same object */ + } + if (zobj1->ce != zobj2->ce) { + return ZEND_UNCOMPARABLE; /* different classes */ + } + + if (!zobj1->properties && !zobj2->properties + && !zend_object_is_lazy(zobj1) && !zend_object_is_lazy(zobj2)) { + zend_property_info *info; + int i; + + if (!zobj1->ce->default_properties_count) { + return 0; + } + + /* It's enough to protect only one of the objects. + * The second one may be referenced from the first and this may cause + * false recursion detection. + */ + /* use bitwise OR to make only one conditional jump */ + if (UNEXPECTED(Z_IS_RECURSIVE_P(o1))) { + zend_throw_error(NULL, "Nesting level too deep - recursive dependency?"); + return ZEND_UNCOMPARABLE; + } + Z_PROTECT_RECURSION_P(o1); + + GC_ADDREF(zobj1); + GC_ADDREF(zobj2); + int ret; + + for (i = 0; i < zobj1->ce->default_properties_count; i++) { + zval *p1, *p2; + + info = zobj1->ce->properties_info_table[i]; + + if (!info) { + continue; + } + + p1 = OBJ_PROP(zobj1, info->offset); + p2 = OBJ_PROP(zobj2, info->offset); + + if (Z_TYPE_P(p1) != IS_UNDEF) { + if (Z_TYPE_P(p2) != IS_UNDEF) { + ret = php_array_compare_transitive(p1, p2); + if (ret != 0) { + Z_UNPROTECT_RECURSION_P(o1); + goto done; + } + } else { + Z_UNPROTECT_RECURSION_P(o1); + ret = 1; + goto done; + } + } else { + if (Z_TYPE_P(p2) != IS_UNDEF) { + Z_UNPROTECT_RECURSION_P(o1); + ret = 1; + goto done; + } + } + } + + Z_UNPROTECT_RECURSION_P(o1); + ret = 0; + +done: + OBJ_RELEASE(zobj1); + OBJ_RELEASE(zobj2); + + return ret; + } else { + GC_ADDREF(zobj1); + GC_ADDREF(zobj2); + + int ret = php_array_compare_transitive_symbol_tables( + zend_std_get_properties_ex(zobj1), + zend_std_get_properties_ex(zobj2)); + + OBJ_RELEASE(zobj1); + OBJ_RELEASE(zobj2); + + return ret; + } +} +/* }}} */ + +/* pared-down version of zend_compare() required for SORT_REGULAR transitivity */ +static int php_array_compare_transitive(zval *op1, zval *op2) +{ + ZVAL_DEREF(op1); + ZVAL_DEREF(op2); + + if (UNEXPECTED(php_array_is_enum_zval(op1) || php_array_is_enum_zval(op2))) { + return php_array_compare_enum_zvals(op1, op2); + } + + switch (PHP_ARRAY_TYPE_PAIR(Z_TYPE_P(op1), Z_TYPE_P(op2))) { + case PHP_ARRAY_TYPE_PAIR(IS_LONG, IS_LONG): + return Z_LVAL_P(op1) > Z_LVAL_P(op2) ? 1 : (Z_LVAL_P(op1) < Z_LVAL_P(op2) ? -1 : 0); + + case PHP_ARRAY_TYPE_PAIR(IS_DOUBLE, IS_LONG): + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op1)))) { + return 1; /* NaN sorts after all integers (totalOrder) */ + } + return ZEND_THREEWAY_COMPARE(Z_DVAL_P(op1), (double) Z_LVAL_P(op2)); + + case PHP_ARRAY_TYPE_PAIR(IS_LONG, IS_DOUBLE): + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op2)))) { + return -1; /* integers sort before NaN (totalOrder) */ + } + return ZEND_THREEWAY_COMPARE((double) Z_LVAL_P(op1), Z_DVAL_P(op2)); + + case PHP_ARRAY_TYPE_PAIR(IS_DOUBLE, IS_DOUBLE): + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op1)))) { + return zend_isnan(Z_DVAL_P(op2)) ? 0 : 1; /* NaN sorts last among doubles (totalOrder) */ + } + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op2)))) { + return -1; /* NaN sorts last among doubles (totalOrder) */ + } + return ZEND_THREEWAY_COMPARE(Z_DVAL_P(op1), Z_DVAL_P(op2)); + + case PHP_ARRAY_TYPE_PAIR(IS_ARRAY, IS_ARRAY): + return php_array_compare_transitive_arrays(op1, op2); + + case PHP_ARRAY_TYPE_PAIR(IS_NULL, IS_NULL): + case PHP_ARRAY_TYPE_PAIR(IS_NULL, IS_FALSE): + case PHP_ARRAY_TYPE_PAIR(IS_FALSE, IS_NULL): + case PHP_ARRAY_TYPE_PAIR(IS_FALSE, IS_FALSE): + case PHP_ARRAY_TYPE_PAIR(IS_TRUE, IS_TRUE): + return 0; + + case PHP_ARRAY_TYPE_PAIR(IS_NULL, IS_TRUE): + return -1; + + case PHP_ARRAY_TYPE_PAIR(IS_TRUE, IS_NULL): + return 1; + + case PHP_ARRAY_TYPE_PAIR(IS_STRING, IS_STRING): + if (Z_STR_P(op1) == Z_STR_P(op2)) { + return 0; + } + return zendi_smart_strcmp_ex(Z_STR_P(op1), Z_STR_P(op2), true); + + case PHP_ARRAY_TYPE_PAIR(IS_NULL, IS_STRING): + return Z_STRLEN_P(op2) == 0 ? 0 : -1; + + case PHP_ARRAY_TYPE_PAIR(IS_STRING, IS_NULL): + return Z_STRLEN_P(op1) == 0 ? 0 : 1; + + case PHP_ARRAY_TYPE_PAIR(IS_LONG, IS_STRING): + return zend_compare_long_to_string_ex(Z_LVAL_P(op1), Z_STR_P(op2), true); + + case PHP_ARRAY_TYPE_PAIR(IS_STRING, IS_LONG): + return -zend_compare_long_to_string_ex(Z_LVAL_P(op2), Z_STR_P(op1), true); + + case PHP_ARRAY_TYPE_PAIR(IS_DOUBLE, IS_STRING): + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op1)))) { + return -1; /* NaN sorts before non-numeric strings (totalOrder) */ + } + return zend_compare_double_to_string_ex(Z_DVAL_P(op1), Z_STR_P(op2), true); + + case PHP_ARRAY_TYPE_PAIR(IS_STRING, IS_DOUBLE): + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op2)))) { + return 1; /* non-numeric strings sort after NaN (totalOrder) */ + } + return -zend_compare_double_to_string_ex(Z_DVAL_P(op2), Z_STR_P(op1), true); + + case PHP_ARRAY_TYPE_PAIR(IS_OBJECT, IS_NULL): + return 1; + + case PHP_ARRAY_TYPE_PAIR(IS_NULL, IS_OBJECT): + return -1; + + default: + if (Z_TYPE_P(op1) == IS_OBJECT + || Z_TYPE_P(op2) == IS_OBJECT) { + return php_array_compare_transitive_objects(op1, op2); + } + + return zend_compare(op1, op2); + } +} +static int php_array_compare_regular(zval *op1, zval *op2) +{ + return php_array_compare_transitive(op1, op2); +} + +static zend_always_inline int php_array_key_compare_regular_unstable_i(Bucket *f, Bucket *s) +{ + zval first; + zval second; + + if (f->key == NULL && s->key == NULL) { + return (zend_long)f->h > (zend_long)s->h ? 1 : -1; + } else if (f->key && s->key) { + if ((unsigned char)f->key->val[0] > '9' + && (unsigned char)s->key->val[0] > '9') { + return zend_compare_non_numeric_strings(f->key, s->key); + } + return zendi_smart_strcmp_ex(f->key, s->key, true); + } + if (f->key) { + ZVAL_STR(&first, f->key); + } else { + ZVAL_LONG(&first, f->h); + } + if (s->key) { + ZVAL_STR(&second, s->key); + } else { + ZVAL_LONG(&second, s->h); + } + return php_array_compare_regular(&first, &second); +} + +static zend_always_inline int php_array_data_compare_regular_unstable_i(Bucket *f, Bucket *s) +{ + zval *op1 = &f->val; + zval *op2 = &s->val; + + ZVAL_DEREF(op1); + ZVAL_DEREF(op2); + + if (EXPECTED(Z_TYPE_P(op1) == IS_LONG && Z_TYPE_P(op2) == IS_LONG)) { + return ZEND_THREEWAY_COMPARE(Z_LVAL_P(op1), Z_LVAL_P(op2)); + } + + if (EXPECTED(Z_TYPE_P(op1) == IS_DOUBLE && Z_TYPE_P(op2) == IS_DOUBLE)) { + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op1)))) { + return zend_isnan(Z_DVAL_P(op2)) ? 0 : 1; /* NaN sorts last among doubles (totalOrder) */ + } + if (UNEXPECTED(zend_isnan(Z_DVAL_P(op2)))) { + return -1; /* NaN sorts last among doubles (totalOrder) */ + } + return ZEND_THREEWAY_COMPARE(Z_DVAL_P(op1), Z_DVAL_P(op2)); + } + + if (EXPECTED(Z_TYPE_P(op1) == IS_STRING && Z_TYPE_P(op2) == IS_STRING)) { + zend_string *str1 = Z_STR_P(op1); + zend_string *str2 = Z_STR_P(op2); + + if ((unsigned char)str1->val[0] > '9' + && (unsigned char)str2->val[0] > '9') { + return zend_compare_non_numeric_strings(str1, str2); + } + + return zendi_smart_strcmp_ex(str1, str2, true); + } + + return php_array_compare_regular(op1, op2); +} + +DEFINE_SORT_VARIANTS(key_compare_regular); +DEFINE_SORT_VARIANTS(data_compare_regular); + static bucket_compare_func_t php_get_key_compare_func(zend_long sort_type) { switch (sort_type & ~PHP_SORT_FLAG_CASE) { @@ -397,7 +722,7 @@ static bucket_compare_func_t php_get_key_compare_func(zend_long sort_type) case PHP_SORT_REGULAR: default: - return php_array_key_compare; + return php_array_key_compare_regular; } return NULL; } @@ -427,7 +752,7 @@ static bucket_compare_func_t php_get_key_reverse_compare_func(zend_long sort_typ case PHP_SORT_REGULAR: default: - return php_array_reverse_key_compare; + return php_array_reverse_key_compare_regular; } return NULL; } @@ -457,7 +782,7 @@ static bucket_compare_func_t php_get_data_compare_func(zend_long sort_type) /* { case PHP_SORT_REGULAR: default: - return php_array_data_compare; + return php_array_data_compare_regular; } return NULL; } @@ -487,7 +812,7 @@ static bucket_compare_func_t php_get_data_reverse_compare_func(zend_long sort_ty case PHP_SORT_REGULAR: default: - return php_array_reverse_data_compare; + return php_array_reverse_data_compare_regular; } return NULL; } @@ -546,9 +871,9 @@ static bucket_compare_func_t php_get_data_compare_func_unstable(zend_long sort_t case PHP_SORT_REGULAR: default: if (reverse) { - return php_array_reverse_data_compare_unstable; + return php_array_reverse_data_compare_regular_unstable; } else { - return php_array_data_compare_unstable; + return php_array_data_compare_regular_unstable; } break; } diff --git a/ext/standard/tests/array/gh20262.phpt b/ext/standard/tests/array/gh20262.phpt new file mode 100644 index 0000000000000..4a64c8d8ce357 --- /dev/null +++ b/ext/standard/tests/array/gh20262.phpt @@ -0,0 +1,93 @@ +--TEST-- +GH-20262 (array_unique() with SORT_REGULAR returns duplicate values) +--FILE-- +streetNumber; +} +echo "\n"; + +echo "\nTest 4: Nested arrays\n"; +$addresses = [ + ['streetNumber' => '5', 'streetName' => 'Main St'], + ['streetNumber' => '10', 'streetName' => 'Main St'], + ['streetNumber' => '10', 'streetName' => 'Main St'], + ['streetNumber' => '3A', 'streetName' => 'Main St'], + ['streetNumber' => '5', 'streetName' => 'Main St'], +]; + +$unique = array_unique($addresses, SORT_REGULAR); +echo "Unique count: " . count($unique) . " (expected 3)\n"; +echo "Street numbers:"; +foreach ($unique as $addr) { + echo " " . $addr['streetNumber']; +} +echo "\n"; + +echo "\nTest 5: sort() consistency with SORT_REGULAR\n"; +$arr1 = ["5", "10", "3A"]; +$arr2 = ["3A", "10", "5"]; +sort($arr1, SORT_REGULAR); +sort($arr2, SORT_REGULAR); +echo "arr1 sorted: ['" . implode("', '", $arr1) . "']\n"; +echo "arr2 sorted: ['" . implode("', '", $arr2) . "']\n"; +echo "Results match: " . ($arr1 === $arr2 ? "yes" : "no") . "\n"; + +?> +--EXPECT-- +Test 1: Scalar array (original bug report) +Array +( + [0] => 5 + [1] => 10 + [3] => 3A +) + +Test 2: Same array with SORT_STRING +Array +( + [0] => 5 + [1] => 10 + [3] => 3A +) + +Test 3: Objects +Unique count: 3 (expected 3) +Street numbers: 5 10 3A + +Test 4: Nested arrays +Unique count: 3 (expected 3) +Street numbers: 5 10 3A + +Test 5: sort() consistency with SORT_REGULAR +arr1 sorted: ['5', '10', '3A'] +arr2 sorted: ['5', '10', '3A'] +Results match: yes diff --git a/ext/standard/tests/array/sort/ksort_variation_numeric_strings.phpt b/ext/standard/tests/array/sort/ksort_variation_numeric_strings.phpt new file mode 100644 index 0000000000000..fb35f953fc1c1 --- /dev/null +++ b/ext/standard/tests/array/sort/ksort_variation_numeric_strings.phpt @@ -0,0 +1,90 @@ +--TEST-- +Test ksort() function: SORT_REGULAR consistency with sort() for numeric string keys +--FILE-- + "a", "16" => "b", "0b10000" => "c"]; + +sort($values, SORT_REGULAR); +ksort($keyed, SORT_REGULAR); + +echo "sort() result: "; +var_dump($values); +echo "ksort() result: "; +var_dump(array_keys($keyed)); + +echo "\n-- Test 2: Mixed integers and numeric strings (from sort test) --\n"; +// Note: This uses actual integer keys mixed with string keys +$values = [10, "3A", 5, "10", ""]; +$keyed = [10 => "a", "3A" => "b", 5 => "c", "10" => "d", "" => "e"]; + +sort($values, SORT_REGULAR); +ksort($keyed, SORT_REGULAR); + +echo "sort() result: "; +var_dump($values); +echo "ksort() result: "; +var_dump(array_keys($keyed)); + +echo "\n-- Test 3: Consistency check (multiple runs) --\n"; +$results = []; +for ($i = 0; $i < 3; $i++) { + $keyed = ["5" => 1, "3A" => 2, "10" => 3]; + ksort($keyed, SORT_REGULAR); + $results[] = implode(",", array_keys($keyed)); +} +echo "All runs produce same result: " . (count(array_unique($results)) === 1 ? "yes" : "no") . "\n"; + +echo "Done\n"; +?> +--EXPECT-- +*** Testing ksort() : SORT_REGULAR consistency with sort() *** + +-- Test 1: Hexadecimal, binary and decimal strings -- +sort() result: array(3) { + [0]=> + string(2) "16" + [1]=> + string(7) "0b10000" + [2]=> + string(4) "0x10" +} +ksort() result: array(3) { + [0]=> + int(16) + [1]=> + string(7) "0b10000" + [2]=> + string(4) "0x10" +} + +-- Test 2: Mixed integers and numeric strings (from sort test) -- +sort() result: array(5) { + [0]=> + string(0) "" + [1]=> + int(5) + [2]=> + int(10) + [3]=> + string(2) "10" + [4]=> + string(2) "3A" +} +ksort() result: array(4) { + [0]=> + string(0) "" + [1]=> + int(5) + [2]=> + int(10) + [3]=> + string(2) "3A" +} + +-- Test 3: Consistency check (multiple runs) -- +All runs produce same result: yes +Done diff --git a/ext/standard/tests/array/sort/sort_enum_stability.phpt b/ext/standard/tests/array/sort/sort_enum_stability.phpt new file mode 100644 index 0000000000000..c0b8c1910a193 --- /dev/null +++ b/ext/standard/tests/array/sort/sort_enum_stability.phpt @@ -0,0 +1,68 @@ +--TEST-- +SORT_REGULAR produces stable ordering for enums regardless of access order +--FILE-- + $c->name, $cases); +} + +function sorted_backed(array $order): array { + $cases = build_cases(BackedEnumExample::class, $order); + sort($cases, SORT_REGULAR); + return array_map(fn(BackedEnumExample $c) => $c->value, $cases); +} + +$unitOrders = [ + ['Hearts', 'Spades', 'Clubs', 'Diamonds'], + ['Diamonds', 'Clubs', 'Spades', 'Hearts'], + ['Spades', 'Hearts', 'Diamonds', 'Clubs'], +]; + +$unitBaseline = sorted_unit($unitOrders[0]); +foreach ($unitOrders as $idx => $order) { + if (sorted_unit($order) !== $unitBaseline) { + echo "Unit enum order mismatch for permutation $idx\n"; + } +} + +$backedOrders = [ + ['Alpha', 'Beta', 'Gamma', 'Delta'], + ['Delta', 'Gamma', 'Beta', 'Alpha'], + ['Beta', 'Alpha', 'Delta', 'Gamma'], +]; + +$backedBaseline = sorted_backed($backedOrders[0]); +foreach ($backedOrders as $idx => $order) { + if (sorted_backed($order) !== $backedBaseline) { + echo "Backed enum order mismatch for permutation $idx\n"; + } +} + +echo "done\n"; +?> +--EXPECT-- +done diff --git a/ext/standard/tests/array/sort/sort_variation_numeric_strings.phpt b/ext/standard/tests/array/sort/sort_variation_numeric_strings.phpt new file mode 100644 index 0000000000000..4322e513ce7f6 --- /dev/null +++ b/ext/standard/tests/array/sort/sort_variation_numeric_strings.phpt @@ -0,0 +1,295 @@ +--TEST-- +Test sort() function: SORT_REGULAR with numeric string edge cases +--FILE-- + +--EXPECTF-- +*** Testing sort() : SORT_REGULAR with numeric edge cases *** + +-- Test 1: Empty string and zero variations -- +array(4) { + [0]=> + string(0) "" + [1]=> + string(1) "0" + [2]=> + string(2) "00" + [3]=> + string(1) "A" +} + +-- Test 2: Numeric strings with whitespace and signs -- +array(5) { + [0]=> + string(2) "-0" + [1]=> + string(1) "0" + [2]=> + string(2) " 5" + [3]=> + string(2) "+5" + [4]=> + string(1) "A" +} + +-- Test 3: Scientific notation and special floats -- +array(5) { + [0]=> + string(3) "5e2" + [1]=> + string(3) "500" + [2]=> + string(4) "-INF" + [3]=> + string(3) "INF" + [4]=> + string(3) "NAN" +} + +-- Test 4: Hexadecimal, binary and decimal strings -- +array(3) { + [0]=> + string(2) "16" + [1]=> + string(7) "0b10000" + [2]=> + string(4) "0x10" +} + +-- Test 5: Mixed integers and numeric strings -- +array(5) { + [0]=> + string(0) "" + [1]=> + int(5) + [2]=> + int(10) + [3]=> + string(2) "10" + [4]=> + string(2) "3A" +} + +-- Test 6: LONG_MAX boundary -- +array(3) { + [0]=> + string(19) "9223372036854775807" + [1]=> + string(19) "9223372036854775808" + [2]=> + %r(int\(9223372036854775807\)|float\(9\.22337203685477[0-9]E\+18\))%r +} + +-- Test 7: Leading/trailing whitespace -- +array(5) { + [0]=> + string(1) "5" + [1]=> + string(2) " 5" + [2]=> + string(2) "5 " + [3]=> + string(3) " 5 " + [4]=> + string(1) "A" +} + +-- Test 8: Zero variations with signs -- +array(5) { + [0]=> + string(1) "0" + [1]=> + string(2) "-0" + [2]=> + string(2) "+0" + [3]=> + string(3) "0.0" + [4]=> + string(4) "-0.0" +} + +-- Test 9: Multiple plus/minus signs -- +array(5) { + [0]=> + string(1) "5" + [1]=> + string(3) "++5" + [2]=> + string(3) "+-5" + [3]=> + string(3) "-+5" + [4]=> + string(3) "--5" +} + +-- Test 10: Decimal point variations -- +array(5) { + [0]=> + string(2) "0." + [1]=> + string(2) ".0" + [2]=> + string(3) "0.0" + [3]=> + string(1) "." + [4]=> + string(1) "A" +} + +-- Test 11: Leading zeros with different values -- +array(5) { + [0]=> + string(2) "00" + [1]=> + string(1) "0" + [2]=> + string(2) "01" + [3]=> + string(3) "001" + [4]=> + string(1) "1" +} + +-- Test 12: Scientific notation variations -- +array(5) { + [0]=> + string(4) "1e-2" + [1]=> + string(3) "1e2" + [2]=> + string(3) "1E2" + [3]=> + string(4) "1e+2" + [4]=> + string(3) "100" +} + +-- Test 13: NaN and INF float values (totalOrder) -- +array(9) { + [0]=> + float(-INF) + [1]=> + int(1) + [2]=> + float(2) + [3]=> + float(3.5) + [4]=> + float(INF) + [5]=> + float(NAN) + [6]=> + float(NAN) + [7]=> + string(2) "10" + [8]=> + string(5) "apple" +} + +-- Test 14: NaN and INF float values reversed -- +array(9) { + [0]=> + string(5) "apple" + [1]=> + string(2) "10" + [2]=> + float(NAN) + [3]=> + float(NAN) + [4]=> + float(INF) + [5]=> + float(3.5) + [6]=> + float(2) + [7]=> + int(1) + [8]=> + float(-INF) +} + +-- Test 15: Consistency check -- +All runs produce same result: yes +Done