Skip to content

Commit ddc4318

Browse files
committed
8352628: Refine Grapheme test
Reviewed-by: jlu, joehw
1 parent d8c2f59 commit ddc4318

File tree

1 file changed

+80
-83
lines changed

1 file changed

+80
-83
lines changed

test/jdk/java/util/regex/RegExTest.java

Lines changed: 80 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -49,7 +49,6 @@
4949
import java.math.BigInteger;
5050
import java.nio.CharBuffer;
5151
import java.nio.file.Files;
52-
import java.nio.file.Path;
5352
import java.nio.file.Paths;
5453
import java.util.*;
5554
import java.util.function.Function;
@@ -62,8 +61,8 @@
6261
import java.util.stream.IntStream;
6362
import java.util.stream.Stream;
6463

64+
import org.testng.annotations.DataProvider;
6565
import org.testng.annotations.Test;
66-
import org.testng.Assert;
6766

6867

6968
import jdk.test.lib.RandomFactory;
@@ -4148,87 +4147,85 @@ public static void embeddedFlags() {
41484147
Pattern.compile("(?imsducxU).(?-imsducxU).");
41494148
}
41504149

4151-
@Test
4152-
public static void grapheme() throws Exception {
4153-
final int[] lineNumber = new int[1];
4154-
Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
4150+
@DataProvider
4151+
private static String[] graphemeTestCases() throws Exception {
4152+
return Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
41554153
Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
4156-
.forEach( ln -> {
4157-
lineNumber[0]++;
4158-
if (ln.length() == 0 || ln.startsWith("#")) {
4159-
return;
4160-
}
4161-
ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4162-
// System.out.println(str);
4163-
String[] strs = ln.split("\u00f7|\u00d7");
4164-
StringBuilder src = new StringBuilder();
4165-
ArrayList<String> graphemes = new ArrayList<>();
4166-
StringBuilder buf = new StringBuilder();
4167-
int offBk = 0;
4168-
for (String str : strs) {
4169-
if (str.length() == 0) // first empty str
4170-
continue;
4171-
int cp = Integer.parseInt(str, 16);
4172-
src.appendCodePoint(cp);
4173-
buf.appendCodePoint(cp);
4174-
offBk += (str.length() + 1);
4175-
if (ln.charAt(offBk) == '\u00f7') { // DIV
4176-
graphemes.add(buf.toString());
4177-
buf = new StringBuilder();
4178-
}
4179-
}
4180-
Pattern p = Pattern.compile("\\X");
4181-
// (1) test \X directly
4182-
Matcher m = p.matcher(src.toString());
4183-
for (String g : graphemes) {
4184-
// System.out.printf(" grapheme:=[%s]%n", g);
4185-
String group = null;
4186-
if (!m.find() || !(group = m.group()).equals(g)) {
4187-
fail("Failed pattern \\X [" + ln + "] : "
4188-
+ "expected: " + g + " - actual: " + group
4189-
+ "(line " + lineNumber[0] + ")");
4190-
}
4191-
}
4192-
assertFalse(m.find());
4193-
// test \b{g} without \X via Pattern
4194-
Pattern pbg = Pattern.compile("\\b{g}");
4195-
m = pbg.matcher(src.toString());
4196-
m.find();
4197-
int prev = m.end();
4198-
for (String g : graphemes) {
4199-
String group = null;
4200-
if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
4201-
fail("Failed pattern \\b{g} [" + ln + "] : "
4202-
+ "expected: " + g + " - actual: " + group
4203-
+ "(line " + lineNumber[0] + ")");
4204-
}
4205-
assertEquals("", m.group());
4206-
prev = m.end();
4207-
}
4208-
assertFalse(m.find());
4209-
// (2) test \b{g} + \X via Scanner
4210-
Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4211-
for (String g : graphemes) {
4212-
String next = null;
4213-
if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
4214-
fail("Failed \\b{g} [" + ln + "] : "
4215-
+ "expected: " + g + " - actual: " + next
4216-
+ " (line " + lineNumber[0] + ")");
4217-
}
4218-
}
4219-
assertFalse(s.hasNext(p));
4220-
// test \b{g} without \X via Scanner
4221-
s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4222-
for (String g : graphemes) {
4223-
String next = null;
4224-
if (!s.hasNext() || !(next = s.next()).equals(g)) {
4225-
fail("Failed \\b{g} [" + ln + "] : "
4226-
+ "expected: " + g + " - actual: " + next
4227-
+ " (line " + lineNumber[0] + ")");
4228-
}
4229-
}
4230-
assertFalse(s.hasNext());
4231-
});
4154+
.filter(line -> !line.isEmpty() && !line.startsWith("#"))
4155+
.toArray(String[]::new);
4156+
}
4157+
4158+
@Test(dataProvider = "graphemeTestCases")
4159+
public static void grapheme(String line) throws Exception {
4160+
String tc = line.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+]|#.*", "");
4161+
String[] strs = tc.split("\u00f7|\u00d7");
4162+
StringBuilder src = new StringBuilder();
4163+
ArrayList<String> graphemes = new ArrayList<>();
4164+
StringBuilder buf = new StringBuilder();
4165+
int offBk = 0;
4166+
for (String str : strs) {
4167+
if (str.length() == 0) // first empty str
4168+
continue;
4169+
int cp = Integer.parseInt(str, 16);
4170+
src.appendCodePoint(cp);
4171+
buf.appendCodePoint(cp);
4172+
offBk += (str.length() + 1);
4173+
if (tc.charAt(offBk) == '\u00f7') { // DIV
4174+
graphemes.add(buf.toString());
4175+
buf = new StringBuilder();
4176+
}
4177+
}
4178+
Pattern p = Pattern.compile("\\X");
4179+
// (1) test \X directly
4180+
Matcher m = p.matcher(src.toString());
4181+
for (String g : graphemes) {
4182+
// System.out.printf(" grapheme:=[%s]%n", g);
4183+
String group = null;
4184+
if (!m.find() || !(group = m.group()).equals(g)) {
4185+
fail("Failed pattern \\X [" + tc + "] : "
4186+
+ "expected: " + g + " - actual: " + group);
4187+
}
4188+
}
4189+
assertFalse(m.find());
4190+
// test \b{g} without \X via Pattern
4191+
Pattern pbg = Pattern.compile("\\b{g}");
4192+
m = pbg.matcher(src.toString());
4193+
m.find();
4194+
int prev = m.end();
4195+
for (String g : graphemes) {
4196+
String group = null;
4197+
if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
4198+
fail("Failed pattern \\b{g} [" + tc + "] : "
4199+
+ "expected: " + g + " - actual: " + group);
4200+
}
4201+
assertEquals("", m.group());
4202+
prev = m.end();
4203+
}
4204+
assertFalse(m.find());
4205+
// (2) test \b{g} + \X via Scanner
4206+
Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4207+
for (String g : graphemes) {
4208+
String next = null;
4209+
if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
4210+
fail("Failed \\b{g} [" + tc + "] : "
4211+
+ "expected: " + g + " - actual: " + next);
4212+
}
4213+
}
4214+
assertFalse(s.hasNext(p));
4215+
// test \b{g} without \X via Scanner
4216+
s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4217+
for (String g : graphemes) {
4218+
String next = null;
4219+
if (!s.hasNext() || !(next = s.next()).equals(g)) {
4220+
fail("Failed \\b{g} [" + tc + "] : "
4221+
+ "expected: " + g + " - actual: " + next);
4222+
}
4223+
}
4224+
assertFalse(s.hasNext());
4225+
}
4226+
4227+
@Test
4228+
public static void graphemeSanity() {
42324229
// some sanity checks
42334230
assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches() &&
42344231
Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() &&

0 commit comments

Comments
 (0)