From d0078741b414b9ac80726c3efdf2d2207eb5a8e1 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sun, 11 May 2025 09:57:20 +0200 Subject: [PATCH] Correctly rewrite bounded repetitions following an explicit class Character classes were never unstacked so a bounded repetition following a class would never be rewritten. This yields no time savings on the bench run, maximum RSS and peak memory do seem to go down by a few MiB (we're talking 129 to 126 or so) but it's not 100% reliable. --- ua-parser/src/lib.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ua-parser/src/lib.rs b/ua-parser/src/lib.rs index b28a880..34fc2c9 100644 --- a/ua-parser/src/lib.rs +++ b/ua-parser/src/lib.rs @@ -664,7 +664,7 @@ fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> { inclass += 1; } ']' if !escape => { - inclass += 1; + inclass -= 1; } // no need for special cases because regex allows nesting // character classes, whereas js or python don't \o/ @@ -721,6 +721,18 @@ mod test_rewrite_regex { assert_eq!(rewrite(".{1,300}x"), ".+x"); } + #[test] + fn rewrite_all_repetitions() { + assert_eq!( + rewrite("; {0,2}(T-(?:07|[^0][0-9])[^;/]{1,100}?)(?: Build|\\) AppleWebKit)"), + "; {0,2}(T-(?:07|[^0][0-9])[^;/]+?)(?: Build|\\) AppleWebKit)", + ); + assert_eq!( + rewrite("; {0,2}(SH\\-?[0-9][0-9][^;/]{1,100}|SBM[0-9][^;/]{1,100}?)(?: Build|\\) AppleWebKit)"), + "; {0,2}(SH\\-?[0-9][0-9][^;/]+|SBM[0-9][^;/]+?)(?: Build|\\) AppleWebKit)", + ) + } + #[test] fn ignore_non_repetitions() { assert_eq!(