|
1 | 1 | /*
|
2 |
| - * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. |
| 2 | + * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. |
3 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
4 | 4 | *
|
5 | 5 | * This code is free software; you can redistribute it and/or modify it
|
|
49 | 49 | import java.math.BigInteger;
|
50 | 50 | import java.nio.CharBuffer;
|
51 | 51 | import java.nio.file.Files;
|
52 |
| -import java.nio.file.Path; |
53 | 52 | import java.nio.file.Paths;
|
54 | 53 | import java.util.*;
|
55 | 54 | import java.util.function.Function;
|
|
62 | 61 | import java.util.stream.IntStream;
|
63 | 62 | import java.util.stream.Stream;
|
64 | 63 |
|
| 64 | +import org.testng.annotations.DataProvider; |
65 | 65 | import org.testng.annotations.Test;
|
66 |
| -import org.testng.Assert; |
67 | 66 |
|
68 | 67 |
|
69 | 68 | import jdk.test.lib.RandomFactory;
|
@@ -4148,87 +4147,85 @@ public static void embeddedFlags() {
|
4148 | 4147 | Pattern.compile("(?imsducxU).(?-imsducxU).");
|
4149 | 4148 | }
|
4150 | 4149 |
|
4151 |
| - @Test |
4152 |
| - public static void grapheme() throws Exception { |
4153 |
| - final int[] lineNumber = new int[1]; |
4154 |
| - Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST), |
| 4150 | + @DataProvider |
| 4151 | + private static String[] graphemeTestCases() throws Exception { |
| 4152 | + return Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST), |
4155 | 4153 | Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
|
4156 |
| - .forEach( ln -> { |
4157 |
| - lineNumber[0]++; |
4158 |
| - if (ln.length() == 0 || ln.startsWith("#")) { |
4159 |
| - return; |
4160 |
| - } |
4161 |
| - ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); |
4162 |
| - // System.out.println(str); |
4163 |
| - String[] strs = ln.split("\u00f7|\u00d7"); |
4164 |
| - StringBuilder src = new StringBuilder(); |
4165 |
| - ArrayList<String> graphemes = new ArrayList<>(); |
4166 |
| - StringBuilder buf = new StringBuilder(); |
4167 |
| - int offBk = 0; |
4168 |
| - for (String str : strs) { |
4169 |
| - if (str.length() == 0) // first empty str |
4170 |
| - continue; |
4171 |
| - int cp = Integer.parseInt(str, 16); |
4172 |
| - src.appendCodePoint(cp); |
4173 |
| - buf.appendCodePoint(cp); |
4174 |
| - offBk += (str.length() + 1); |
4175 |
| - if (ln.charAt(offBk) == '\u00f7') { // DIV |
4176 |
| - graphemes.add(buf.toString()); |
4177 |
| - buf = new StringBuilder(); |
4178 |
| - } |
4179 |
| - } |
4180 |
| - Pattern p = Pattern.compile("\\X"); |
4181 |
| - // (1) test \X directly |
4182 |
| - Matcher m = p.matcher(src.toString()); |
4183 |
| - for (String g : graphemes) { |
4184 |
| - // System.out.printf(" grapheme:=[%s]%n", g); |
4185 |
| - String group = null; |
4186 |
| - if (!m.find() || !(group = m.group()).equals(g)) { |
4187 |
| - fail("Failed pattern \\X [" + ln + "] : " |
4188 |
| - + "expected: " + g + " - actual: " + group |
4189 |
| - + "(line " + lineNumber[0] + ")"); |
4190 |
| - } |
4191 |
| - } |
4192 |
| - assertFalse(m.find()); |
4193 |
| - // test \b{g} without \X via Pattern |
4194 |
| - Pattern pbg = Pattern.compile("\\b{g}"); |
4195 |
| - m = pbg.matcher(src.toString()); |
4196 |
| - m.find(); |
4197 |
| - int prev = m.end(); |
4198 |
| - for (String g : graphemes) { |
4199 |
| - String group = null; |
4200 |
| - if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) { |
4201 |
| - fail("Failed pattern \\b{g} [" + ln + "] : " |
4202 |
| - + "expected: " + g + " - actual: " + group |
4203 |
| - + "(line " + lineNumber[0] + ")"); |
4204 |
| - } |
4205 |
| - assertEquals("", m.group()); |
4206 |
| - prev = m.end(); |
4207 |
| - } |
4208 |
| - assertFalse(m.find()); |
4209 |
| - // (2) test \b{g} + \X via Scanner |
4210 |
| - Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); |
4211 |
| - for (String g : graphemes) { |
4212 |
| - String next = null; |
4213 |
| - if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) { |
4214 |
| - fail("Failed \\b{g} [" + ln + "] : " |
4215 |
| - + "expected: " + g + " - actual: " + next |
4216 |
| - + " (line " + lineNumber[0] + ")"); |
4217 |
| - } |
4218 |
| - } |
4219 |
| - assertFalse(s.hasNext(p)); |
4220 |
| - // test \b{g} without \X via Scanner |
4221 |
| - s = new Scanner(src.toString()).useDelimiter("\\b{g}"); |
4222 |
| - for (String g : graphemes) { |
4223 |
| - String next = null; |
4224 |
| - if (!s.hasNext() || !(next = s.next()).equals(g)) { |
4225 |
| - fail("Failed \\b{g} [" + ln + "] : " |
4226 |
| - + "expected: " + g + " - actual: " + next |
4227 |
| - + " (line " + lineNumber[0] + ")"); |
4228 |
| - } |
4229 |
| - } |
4230 |
| - assertFalse(s.hasNext()); |
4231 |
| - }); |
| 4154 | + .filter(line -> !line.isEmpty() && !line.startsWith("#")) |
| 4155 | + .toArray(String[]::new); |
| 4156 | + } |
| 4157 | + |
| 4158 | + @Test(dataProvider = "graphemeTestCases") |
| 4159 | + public static void grapheme(String line) throws Exception { |
| 4160 | + String tc = line.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+]|#.*", ""); |
| 4161 | + String[] strs = tc.split("\u00f7|\u00d7"); |
| 4162 | + StringBuilder src = new StringBuilder(); |
| 4163 | + ArrayList<String> graphemes = new ArrayList<>(); |
| 4164 | + StringBuilder buf = new StringBuilder(); |
| 4165 | + int offBk = 0; |
| 4166 | + for (String str : strs) { |
| 4167 | + if (str.length() == 0) // first empty str |
| 4168 | + continue; |
| 4169 | + int cp = Integer.parseInt(str, 16); |
| 4170 | + src.appendCodePoint(cp); |
| 4171 | + buf.appendCodePoint(cp); |
| 4172 | + offBk += (str.length() + 1); |
| 4173 | + if (tc.charAt(offBk) == '\u00f7') { // DIV |
| 4174 | + graphemes.add(buf.toString()); |
| 4175 | + buf = new StringBuilder(); |
| 4176 | + } |
| 4177 | + } |
| 4178 | + Pattern p = Pattern.compile("\\X"); |
| 4179 | + // (1) test \X directly |
| 4180 | + Matcher m = p.matcher(src.toString()); |
| 4181 | + for (String g : graphemes) { |
| 4182 | + // System.out.printf(" grapheme:=[%s]%n", g); |
| 4183 | + String group = null; |
| 4184 | + if (!m.find() || !(group = m.group()).equals(g)) { |
| 4185 | + fail("Failed pattern \\X [" + tc + "] : " |
| 4186 | + + "expected: " + g + " - actual: " + group); |
| 4187 | + } |
| 4188 | + } |
| 4189 | + assertFalse(m.find()); |
| 4190 | + // test \b{g} without \X via Pattern |
| 4191 | + Pattern pbg = Pattern.compile("\\b{g}"); |
| 4192 | + m = pbg.matcher(src.toString()); |
| 4193 | + m.find(); |
| 4194 | + int prev = m.end(); |
| 4195 | + for (String g : graphemes) { |
| 4196 | + String group = null; |
| 4197 | + if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) { |
| 4198 | + fail("Failed pattern \\b{g} [" + tc + "] : " |
| 4199 | + + "expected: " + g + " - actual: " + group); |
| 4200 | + } |
| 4201 | + assertEquals("", m.group()); |
| 4202 | + prev = m.end(); |
| 4203 | + } |
| 4204 | + assertFalse(m.find()); |
| 4205 | + // (2) test \b{g} + \X via Scanner |
| 4206 | + Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); |
| 4207 | + for (String g : graphemes) { |
| 4208 | + String next = null; |
| 4209 | + if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) { |
| 4210 | + fail("Failed \\b{g} [" + tc + "] : " |
| 4211 | + + "expected: " + g + " - actual: " + next); |
| 4212 | + } |
| 4213 | + } |
| 4214 | + assertFalse(s.hasNext(p)); |
| 4215 | + // test \b{g} without \X via Scanner |
| 4216 | + s = new Scanner(src.toString()).useDelimiter("\\b{g}"); |
| 4217 | + for (String g : graphemes) { |
| 4218 | + String next = null; |
| 4219 | + if (!s.hasNext() || !(next = s.next()).equals(g)) { |
| 4220 | + fail("Failed \\b{g} [" + tc + "] : " |
| 4221 | + + "expected: " + g + " - actual: " + next); |
| 4222 | + } |
| 4223 | + } |
| 4224 | + assertFalse(s.hasNext()); |
| 4225 | + } |
| 4226 | + |
| 4227 | + @Test |
| 4228 | + public static void graphemeSanity() { |
4232 | 4229 | // some sanity checks
|
4233 | 4230 | assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches() &&
|
4234 | 4231 | Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() &&
|
|
0 commit comments