Skip to content

Commit a50fbc5

Browse files
mrzasanobu
authored andcommitted
Use mbuf instead of bitset for character class for small UTF. Fixes #16145
1 parent 025832c commit a50fbc5

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

regparse.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5669,7 +5669,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
56695669
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
56705670
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
56715671
if (add_flag) {
5672-
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5672+
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= 0x80) {
56735673
r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
56745674
if (r < 0) return r;
56755675
}
@@ -5681,7 +5681,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
56815681
#else
56825682
if (is_in != 0) {
56835683
if (add_flag) {
5684-
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5684+
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= 0x80) {
56855685
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
56865686
r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
56875687
if (r < 0) return r;

test/ruby/test_regexp.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,4 +2114,17 @@ def test_linear_performance
21142114
re =~ s
21152115
end
21162116
end
2117+
2118+
def test_bug_16145_caseinsensitive_small_utf # [Bug#16145]
2119+
o_acute_lower = 243.chr('UTF-8')
2120+
o_acute_upper = 211.chr('UTF-8')
2121+
# [xó] =~ "abcÓ"
2122+
assert(/[x#{o_acute_lower}]/i.match?("abc#{o_acute_upper}"), "should match o acute case insensitive")
2123+
2124+
2125+
e_acute_lower = 233.chr('UTF-8')
2126+
e_acute_upper = 201.chr('UTF-8')
2127+
# [xé] =~ 'CAFÉ'
2128+
assert(/[x#{e_acute_lower}]/i.match?("CAF#{e_acute_upper}"), "should match e acute case insensitive")
2129+
end
21172130
end

0 commit comments

Comments
 (0)