From a55bf78b2cedae4356c1e9425c4bd773d057128e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Wer=C5=82os?= Date: Thu, 21 Aug 2025 21:57:18 +0200 Subject: [PATCH 1/5] Add test showing current behaviour --- ext/tokenizer/tests/bug19507.phpt | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 ext/tokenizer/tests/bug19507.phpt diff --git a/ext/tokenizer/tests/bug19507.phpt b/ext/tokenizer/tests/bug19507.phpt new file mode 100644 index 0000000000000..fc67d603fb6a5 --- /dev/null +++ b/ext/tokenizer/tests/bug19507.phpt @@ -0,0 +1,27 @@ +--TEST-- +Bug #19507: Recursive tokenization during `token_get_all` result on error +--EXTENSIONS-- +tokenizer +--FILE-- +getTokenName(), "\n"; +} + +--EXPECT-- +error handler called: Non-canonical cast (double) is deprecated, use the (float) cast instead +T_OPEN_TAG +T_LNUMBER +; +T_DOUBLE_CAST +T_WHITESPACE +T_VARIABLE +; From 43a7a9a4fdb97398eb47783e1dfd0265bcfeb70c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Wer=C5=82os?= Date: Thu, 21 Aug 2025 22:43:01 +0200 Subject: [PATCH 2/5] Update test and add fix by `arnaud-lb` --- Zend/zend_language_scanner.l | 17 +++++++++++++++++ .../tests/{bug19507.phpt => gh19507.phpt} | 4 +--- 2 files changed, 18 insertions(+), 3 deletions(-) rename ext/tokenizer/tests/{bug19507.phpt => gh19507.phpt} (97%) diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index d298ae8b9eac4..aa8b352272f74 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -210,6 +210,7 @@ void shutdown_scanner(void) zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); SCNG(heredoc_scan_ahead) = 0; SCNG(on_event) = NULL; + SCNG(on_event_context) = NULL; } ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state) @@ -581,6 +582,8 @@ ZEND_API zend_result open_file_for_scanning(zend_file_handle *file_handle) zend_set_compiled_filename(compiled_filename); zend_string_release_ex(compiled_filename, 0); + SCNG(on_event) = NULL; + SCNG(on_event_context) = NULL; RESET_DOC_COMMENT(); CG(zend_lineno) = 1; CG(increment_lineno) = 0; @@ -766,6 +769,8 @@ ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename) zend_set_compiled_filename(filename); CG(zend_lineno) = 1; CG(increment_lineno) = 0; + SCNG(on_event) = NULL; + SCNG(on_event_context) = NULL; RESET_DOC_COMMENT(); } @@ -1636,6 +1641,9 @@ OPTIONAL_WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_ "("{TABS_AND_SPACES}("integer"){TABS_AND_SPACES}")" { if (PARSER_MODE()) { zend_error(E_DEPRECATED, "Non-canonical cast (integer) is deprecated, use the (int) cast instead"); + if (PARSER_MODE() && EG(exception)) { + RETURN_TOKEN(T_ERROR); + } } RETURN_TOKEN(T_INT_CAST); } @@ -1647,6 +1655,9 @@ OPTIONAL_WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_ "("{TABS_AND_SPACES}("double"){TABS_AND_SPACES}")" { if (PARSER_MODE()) { zend_error(E_DEPRECATED, "Non-canonical cast (double) is deprecated, use the (float) cast instead"); + if (PARSER_MODE() && EG(exception)) { + RETURN_TOKEN(T_ERROR); + } } RETURN_TOKEN(T_DOUBLE_CAST); } @@ -1666,6 +1677,9 @@ OPTIONAL_WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_ "("{TABS_AND_SPACES}("binary"){TABS_AND_SPACES}")" { if (PARSER_MODE()) { zend_error(E_DEPRECATED, "Non-canonical cast (binary) is deprecated, use the (string) cast instead"); + if (PARSER_MODE() && EG(exception)) { + RETURN_TOKEN(T_ERROR); + } } RETURN_TOKEN(T_STRING_CAST); } @@ -1685,6 +1699,9 @@ OPTIONAL_WHITESPACE_OR_COMMENTS ({WHITESPACE}|{MULTI_LINE_COMMENT}|{SINGLE_LINE_ "("{TABS_AND_SPACES}("boolean"){TABS_AND_SPACES}")" { if (PARSER_MODE()) { zend_error(E_DEPRECATED, "Non-canonical cast (boolean) is deprecated, use the (bool) cast instead"); + if (PARSER_MODE() && EG(exception)) { + RETURN_TOKEN(T_ERROR); + } } RETURN_TOKEN(T_BOOL_CAST); } diff --git a/ext/tokenizer/tests/bug19507.phpt b/ext/tokenizer/tests/gh19507.phpt similarity index 97% rename from ext/tokenizer/tests/bug19507.phpt rename to ext/tokenizer/tests/gh19507.phpt index fc67d603fb6a5..4b147487c8572 100644 --- a/ext/tokenizer/tests/bug19507.phpt +++ b/ext/tokenizer/tests/gh19507.phpt @@ -15,12 +15,10 @@ $tokens = PhpToken::tokenize('getTokenName(), "\n"; } - +?> --EXPECT-- error handler called: Non-canonical cast (double) is deprecated, use the (float) cast instead T_OPEN_TAG -T_LNUMBER -; T_DOUBLE_CAST T_WHITESPACE T_VARIABLE From 101cb54286d4800530eb0cf6764152db127697d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Wer=C5=82os?= Date: Fri, 22 Aug 2025 16:39:18 +0200 Subject: [PATCH 3/5] Update ext/tokenizer/tests/gh19507.phpt Co-authored-by: Arnaud Le Blanc <365207+arnaud-lb@users.noreply.github.com> --- ext/tokenizer/tests/gh19507.phpt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/tokenizer/tests/gh19507.phpt b/ext/tokenizer/tests/gh19507.phpt index 4b147487c8572..63694698f8d11 100644 --- a/ext/tokenizer/tests/gh19507.phpt +++ b/ext/tokenizer/tests/gh19507.phpt @@ -1,5 +1,5 @@ --TEST-- -Bug #19507: Recursive tokenization during `token_get_all` result on error +GH-19507: Corrupted result after recursive tokenization during token_get_all() --EXTENSIONS-- tokenizer --FILE-- From 11263378f4aa7c3c4169336b90b9ca659715ca8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Wer=C5=82os?= Date: Fri, 22 Aug 2025 17:28:32 +0200 Subject: [PATCH 4/5] Review updates --- Zend/zend_language_scanner.l | 1 + .../tests/{gh19507.phpt => gh19507_eval.phpt} | 2 +- ext/tokenizer/tests/gh19507_throw.phpt | 24 +++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) rename ext/tokenizer/tests/{gh19507.phpt => gh19507_eval.phpt} (92%) create mode 100644 ext/tokenizer/tests/gh19507_throw.phpt diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index aa8b352272f74..a968555b38028 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -2758,6 +2758,7 @@ skip_escape_conversion: SCNG(heredoc_scan_ahead) = 1; SCNG(heredoc_indentation) = 0; SCNG(heredoc_indentation_uses_spaces) = 0; + SCNG(on_event_context); LANG_SCNG(on_event) = NULL; CG(doc_comment) = NULL; diff --git a/ext/tokenizer/tests/gh19507.phpt b/ext/tokenizer/tests/gh19507_eval.phpt similarity index 92% rename from ext/tokenizer/tests/gh19507.phpt rename to ext/tokenizer/tests/gh19507_eval.phpt index 63694698f8d11..e731cf818afbd 100644 --- a/ext/tokenizer/tests/gh19507.phpt +++ b/ext/tokenizer/tests/gh19507_eval.phpt @@ -1,5 +1,5 @@ --TEST-- -GH-19507: Corrupted result after recursive tokenization during token_get_all() +GH-19507: Corrupted result after recursive tokenization during token_get_all() (error handler with eval) --EXTENSIONS-- tokenizer --FILE-- diff --git a/ext/tokenizer/tests/gh19507_throw.phpt b/ext/tokenizer/tests/gh19507_throw.phpt new file mode 100644 index 0000000000000..4449fe430538d --- /dev/null +++ b/ext/tokenizer/tests/gh19507_throw.phpt @@ -0,0 +1,24 @@ +--TEST-- +GH-19507: Corrupted result after recursive tokenization during token_get_all() (error handler with throw) +--EXTENSIONS-- +tokenizer +--FILE-- +getTokenName(), "\n"; +} +?> +--EXPECTF-- +Fatal error: Uncaught RuntimeException: error handler called: Non-canonical cast (double) is deprecated, use the (float) cast instead in %s:%d +Stack trace: +#0 [internal function]: {closure:%s:%d}(%d, 'Non-canonical c...', '', 1) +#1 %s(%d): PhpToken::tokenize(' Date: Fri, 22 Aug 2025 17:45:11 +0200 Subject: [PATCH 5/5] Fix `Zend/zend_language_scanner.l` --- Zend/zend_language_scanner.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index a968555b38028..73dc48d07ff9d 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -2758,7 +2758,7 @@ skip_escape_conversion: SCNG(heredoc_scan_ahead) = 1; SCNG(heredoc_indentation) = 0; SCNG(heredoc_indentation_uses_spaces) = 0; - SCNG(on_event_context); + SCNG(on_event_context) = NULL; LANG_SCNG(on_event) = NULL; CG(doc_comment) = NULL;