Skip to content

Commit 5c804e4

Browse files
committed
bugfix: char class casefold for certain chars
When a character is less than or equal to single byte size (0xff), yet it takes more than 1 byte in the current encoding, the case folding code incorrectly put it in bitset instead of code range. As a result, for utf8 encoding, casefold works incorrectly on characters in range \u0080 to \u00ff (latin1 supplement). Before fix: * `"\u00c2"` `[\u00e0-\u00e5]` returns false * `"\u00c2"` `[\u00e2]` returns false * `"\u00c2"` `\u00e2` returns true
1 parent a0910bb commit 5c804e4

File tree

2 files changed

+56
-1
lines changed

2 files changed

+56
-1
lines changed

src/org/joni/ApplyCaseFold.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public void apply(int from, int[]to, int length, Object o) {
4141

4242
if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
4343
if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
44-
if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
44+
if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE || enc.codeToMbcLength(to[0]) > 1) {
4545
cc.addCodeRange(env, to[0], to[0]);
4646
} else {
4747
/* /(?i:[^A-C])/.match("a") ==> fail. */

test/org/joni/test/TestJava.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Permission is hereby granted, free of charge, to any person obtaining a copy of
3+
* this software and associated documentation files (the "Software"), to deal in
4+
* the Software without restriction, including without limitation the rights to
5+
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6+
* of the Software, and to permit persons to whom the Software is furnished to do
7+
* so, subject to the following conditions:
8+
*
9+
* The above copyright notice and this permission notice shall be included in all
10+
* copies or substantial portions of the Software.
11+
*
12+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18+
* SOFTWARE.
19+
*/
20+
package org.joni.test;
21+
22+
import org.jcodings.Encoding;
23+
import org.jcodings.specific.UTF8Encoding;
24+
import org.joni.Option;
25+
import org.joni.Syntax;
26+
27+
public class TestJava extends Test {
28+
29+
public int option() {
30+
return Option.DEFAULT;
31+
}
32+
33+
public Encoding encoding() {
34+
return UTF8Encoding.INSTANCE;
35+
}
36+
37+
public String testEncoding() {
38+
return "utf-8";
39+
}
40+
41+
public Syntax syntax() {
42+
return Syntax.Java;
43+
}
44+
45+
public void test() throws InterruptedException {
46+
// test ignorecase for Latin-1 Supplement
47+
x2s("[\\u00e0-\\u00e5]", "\u00c2", 0, 2, Option.IGNORECASE);
48+
x2s("[\\u00e2]", "\u00c2", 0, 2, Option.IGNORECASE);
49+
x2s("\\u00e2", "\u00c2", 0, 2, Option.IGNORECASE);
50+
}
51+
52+
public static void main(String[] args) throws Throwable {
53+
new TestJava().run();
54+
}
55+
}

0 commit comments

Comments
 (0)