Skip to content

Commit 6d580e0

Browse files
committed
Mark strings as IS_STR_VALID_UTF8 on-the-fly
Alternative to GH-10870. Use atomic writes for adding the IS_STR_VALID_UTF8 flag to UTF-8-verified interned strings in ext-mbstring. x86 and other architectures guarantee atomic writes/reads for aligned variables up to size_t, which we already rely on, particularly for zend_op.handler being swapped out in the JIT. The atomic write is only needed here to not drop any other newly written bits (which there currently aren't any of). We use GCC and sync atomics because they don't require annotating the modified variable with the C11 _Atomic keyword.
1 parent 779ae6f commit 6d580e0

File tree

3 files changed

+30
-8
lines changed

3 files changed

+30
-8
lines changed

ext/mbstring/mbstring.c

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,28 @@
6767

6868
#include "zend_simd.h"
6969

70+
#include "ext/opcache/zend_shared_alloc.h"
71+
#include "ext/opcache/ZendAccelerator.h"
72+
73+
static void mark_zstr_as_utf8(zend_string *s)
74+
{
75+
if (!ZSTR_IS_INTERNED(s)) {
76+
GC_ADD_FLAGS(s, IS_STR_VALID_UTF8);
77+
return;
78+
}
79+
80+
/* We don't use zend_atomic.h as we're writing to a non-_Atomic field. */
81+
#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 7) || (__GNUC__ > 4)
82+
SHM_UNPROTECT();
83+
__atomic_or_fetch(&GC_TYPE_INFO(s), IS_STR_VALID_UTF8 << GC_FLAGS_SHIFT, __ATOMIC_SEQ_CST);
84+
SHM_PROTECT();
85+
#elif defined(__GNUC__)
86+
SHM_UNPROTECT();
87+
__sync_or_and_fetch(&GC_TYPE_INFO(s), IS_STR_VALID_UTF8 << GC_FLAGS_SHIFT);
88+
SHM_PROTECT();
89+
#endif
90+
}
91+
7092
/* }}} */
7193

7294
/* {{{ prototypes */
@@ -2263,8 +2285,8 @@ PHP_FUNCTION(mb_substr_count)
22632285
} else {
22642286
unsigned int num_errors = 0;
22652287
haystack_u8 = mb_fast_convert((unsigned char*)ZSTR_VAL(haystack), ZSTR_LEN(haystack), enc, &mbfl_encoding_utf8, 0, MBFL_OUTPUTFILTER_ILLEGAL_MODE_BADUTF8, &num_errors);
2266-
if (!num_errors && !ZSTR_IS_INTERNED(haystack)) {
2267-
GC_ADD_FLAGS(haystack, IS_STR_VALID_UTF8);
2288+
if (!num_errors) {
2289+
mark_zstr_as_utf8(haystack);
22682290
}
22692291
}
22702292

@@ -2273,8 +2295,8 @@ PHP_FUNCTION(mb_substr_count)
22732295
} else {
22742296
unsigned int num_errors = 0;
22752297
needle_u8 = mb_fast_convert((unsigned char*)ZSTR_VAL(needle), ZSTR_LEN(needle), enc, &mbfl_encoding_utf8, 0, MBFL_OUTPUTFILTER_ILLEGAL_MODE_BADUTF8, &num_errors);
2276-
if (!num_errors && !ZSTR_IS_INTERNED(needle)) {
2277-
GC_ADD_FLAGS(needle, IS_STR_VALID_UTF8);
2298+
if (!num_errors) {
2299+
mark_zstr_as_utf8(needle);
22782300
}
22792301
}
22802302
} else {
@@ -5567,8 +5589,8 @@ static bool mb_check_str_encoding(zend_string *str, const mbfl_encoding *encodin
55675589
return true;
55685590
}
55695591
bool result = mb_fast_check_utf8(str);
5570-
if (result && !ZSTR_IS_INTERNED(str)) {
5571-
GC_ADD_FLAGS(str, IS_STR_VALID_UTF8);
5592+
if (result) {
5593+
mark_zstr_as_utf8(str);
55725594
}
55735595
return result;
55745596
} else {

ext/opcache/zend_shared_alloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ const char *zend_accel_get_shared_model(void)
628628
return g_shared_model;
629629
}
630630

631-
void zend_accel_shared_protect(bool protected)
631+
ZEND_API void zend_accel_shared_protect(bool protected)
632632
{
633633
#ifdef HAVE_MPROTECT
634634
int i;

ext/opcache/zend_shared_alloc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ const char *zend_accel_get_shared_model(void);
200200
* @param protected true to protect shared memory (read-only), false
201201
* to unprotect shared memory (writable)
202202
*/
203-
void zend_accel_shared_protect(bool protected);
203+
ZEND_API void zend_accel_shared_protect(bool protected);
204204

205205
#ifdef USE_MMAP
206206
extern const zend_shared_memory_handlers zend_alloc_mmap_handlers;

0 commit comments

Comments
 (0)