Skip to content

Commit df6ddce

Browse files
author
Craig Cornelius
authored
Fix collation test gen. Also resolve all Java and CPP executor test failures. (#495)
* Fix problems from unescaping test strings * UPdate collation generator to create correct rules * Fixing collation generation to handle \x20 and \xA0 characters. ICU4J handle caseFirst and caseLevel * FIxing all bug 6 ICU4C collation test failures * Fixing all but 6 ICU4C collation test failures * Fix all test failures in CPP collation
1 parent eada155 commit df6ddce

File tree

5 files changed

+163
-38
lines changed

5 files changed

+163
-38
lines changed

executors/cpp/coll.cpp

Lines changed: 81 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,24 @@ auto TestCollator(json_object *json_in) -> string {
227227
reorder_codes_v = BuildReorderList(reorder_string, debug_level);
228228
}
229229

230+
json_object *alternate_obj = json_object_object_get(json_in, "alternate");
231+
UColAttributeValue alternate_value;
232+
if (alternate_obj) {
233+
string alternate = json_object_get_string(alternate_obj);
234+
if (alternate == "shifted") {
235+
alternate_value = UCOL_SHIFTED;
236+
} else
237+
if (alternate == "non-ignorable") {
238+
alternate_value = UCOL_NON_IGNORABLE;
239+
}
240+
}
241+
230242
// Check for rule-based collation
231243
json_object *rules_obj = json_object_object_get(json_in, "rules");
232244
int rules_len = json_object_get_string_len(rules_obj);
233245
string_view rules_string(json_object_get_string(rules_obj), rules_len);
234246
UnicodeString uni_rules = UnicodeString::fromUTF8(rules_string).unescape();
247+
string actual_rules = "";
235248

236249
// Handle some options
237250
json_object *ignore_obj =
@@ -262,16 +275,57 @@ auto TestCollator(json_object *json_in) -> string {
262275
return json_object_to_json_string(return_json);
263276
}
264277

278+
// Get the rules as seen by the collator.
279+
UnicodeString gotten_rules = rb_coll->getRules();
280+
gotten_rules.toUTF8String(actual_rules);
281+
282+
// Make sure that attributes and optionsare set for rule based collator, too.
265283
if (reorder_obj) {
266-
if (debug_level > 0) {
267-
cout << "# RB_COLL: reorder codes: " << reorder_string << "(" << reorder_codes_v.size() << ")" << endl;
268-
}
269284
rb_coll->setReorderCodes(reorder_codes_v.data(), reorder_codes_v.size(), status);
270285
if (check_icu_error(status, return_json, "rb_coll with reorder")) {
271286
return json_object_to_json_string(return_json);
272287
}
273288
}
274289

290+
if (alternate_obj) {
291+
rb_coll->setAttribute(UCOL_ALTERNATE_HANDLING, alternate_value, status);
292+
if (check_icu_error(status, return_json, "alternate")) {
293+
json_object_object_add(
294+
return_json, "actual_options",
295+
json_object_new_string(json_object_get_string(json_in)));
296+
297+
return json_object_to_json_string(return_json);
298+
}
299+
}
300+
301+
json_object *case_first_obj = json_object_object_get(json_in, "caseFirst");
302+
if (case_first_obj) {
303+
// TODO: Check status
304+
string case_first = json_object_get_string(case_first_obj);
305+
if (case_first == "lower") {
306+
rb_coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
307+
} else
308+
if (case_first == "upper") {
309+
rb_coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
310+
}
311+
}
312+
313+
json_object *case_level_obj = json_object_object_get(json_in, "caseLevel");
314+
if (case_level_obj) {
315+
// TODO: Check status
316+
string case_level = json_object_get_string(case_level_obj);
317+
if (case_level == "off") {
318+
rb_coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
319+
} else
320+
if (case_level == "on") {
321+
rb_coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
322+
}
323+
}
324+
325+
if (strength_obj != nullptr) {
326+
rb_coll->setStrength(strength_type);
327+
}
328+
275329
uni_result = rb_coll->compare(us1, us2, status);
276330
if (check_icu_error(status, return_json, "rb_coll->compare")) {
277331
return json_object_to_json_string(return_json);
@@ -292,12 +346,28 @@ auto TestCollator(json_object *json_in) -> string {
292346
this_locale = Locale(locale_string);
293347
}
294348
uni_coll = Collator::createInstance(this_locale, status);
349+
if (check_icu_error(status, return_json,
350+
"Collator:createInstance")) {
351+
json_object_object_add(
352+
return_json, "actual_options",
353+
json_object_new_string(json_object_get_string(json_in)));
354+
355+
return json_object_to_json_string(return_json);
356+
}
295357
}
296358

297-
if (reorder_obj) {
298-
if (debug_level > 0) {
299-
cout << "# UNI_COLL: reorder codes: " << reorder_string << "(" << reorder_codes_v.size() << ")" << endl;
359+
if (alternate_obj) {
360+
uni_coll->setAttribute(UCOL_ALTERNATE_HANDLING, alternate_value, status);
361+
if (check_icu_error(status, return_json, "alternate")) {
362+
json_object_object_add(
363+
return_json, "actual_options",
364+
json_object_new_string(json_object_get_string(json_in)));
365+
366+
return json_object_to_json_string(return_json);
300367
}
368+
}
369+
370+
if (reorder_obj) {
301371
uni_coll->setReorderCodes(reorder_codes_v.data(), reorder_codes_v.size(), status);
302372
if (check_icu_error(status, return_json, "uni_coll->setReorderCodes")) {
303373
return json_object_to_json_string(return_json);
@@ -344,9 +414,6 @@ auto TestCollator(json_object *json_in) -> string {
344414
values_it = values_map.find(test_value);
345415
if (values_it != values_map.end()) {
346416
// This is the value that we can set
347-
if (debug_level > 0) {
348-
cout << "# SETTING attribute " << key << " to " << test_value << " == " << values_it->second << endl;
349-
}
350417
uni_coll->setAttribute(ucol_attribute, values_it->second, status);
351418
if (check_icu_error(
352419
status, return_json,
@@ -365,29 +432,14 @@ auto TestCollator(json_object *json_in) -> string {
365432
values_it = val_attribute_map.find(test_value);
366433
if (values_it != val_attribute_map.end()) {
367434
uni_coll->setMaxVariable(values_it->second, status);
368-
if (debug_level > 0) {
369-
cout << "# SETTING maxVariable to " << values_it->second << " = " << test_value << endl;
370-
}
371435
}
372436
}
373437

374-
// Just to check the result.
375-
uni_coll->getAttribute(UCOL_ALTERNATE_HANDLING, status); // ignore return
376-
if (check_icu_error(
377-
status, return_json,
378-
"getet UCOL_ALTERNATE_HANDLING")) {
379-
return json_object_to_json_string(return_json);
380-
}
381-
382438
// Perform the string comparison
383439
uni_result = uni_coll->compare(us1, us2, status);
384-
if (check_icu_error( status, return_json, "uni_coll_compare")) {
440+
if (check_icu_error( status, return_json, "uni_coll->compare")) {
385441
return json_object_to_json_string(return_json);
386442
}
387-
388-
if (uni_coll != nullptr) {
389-
uni_coll->getAttribute(UCOL_ALTERNATE_HANDLING, status); // ignore result
390-
}
391443
delete uni_coll;
392444
if (check_icu_error( status, return_json, "uni_coll->getATTRIBUTE")) {
393445
return json_object_to_json_string(return_json);
@@ -408,7 +460,7 @@ auto TestCollator(json_object *json_in) -> string {
408460

409461
if (!coll_result) {
410462
// Test did not succeed!
411-
// Include data compared in the failing test
463+
// Include the data compared in the failing test
412464
json_object* actual_values = json_object_new_object();
413465

414466
json_object_object_add(
@@ -424,14 +476,12 @@ auto TestCollator(json_object *json_in) -> string {
424476
return_json, "actual_options",
425477
actual_values);
426478

427-
428479
if (rules_len > 0) {
480+
// Show the rules that were actually found.
429481
json_object_object_add(
430482
actual_values,
431483
"rules_actual",
432-
json_object_new_string_len(
433-
rules_string.data(),
434-
rules_string.size())
484+
json_object_new_string(actual_rules.c_str())
435485
);
436486
}
437487

@@ -440,6 +490,7 @@ auto TestCollator(json_object *json_in) -> string {
440490
return_json, "compare", json_object_new_int64(uni_result));
441491
}
442492

493+
// The output
443494
json_object_object_add(
444495
return_json, "result", json_object_new_boolean(static_cast<json_bool>(coll_result)));
445496

executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/collator/CollatorInputJson.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ public class CollatorInputJson implements ITestTypeInputJson {
3333
public int[] reorder_codes;
3434
public String unrecognized_script_codes;
3535

36-
public String reorder;
36+
public String caseFirst;
3737

38-
public String case_first;
38+
public String caseLevel;
3939

4040
public String backwards;
4141

executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/collator/CollatorTester.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,19 @@ public ITestTypeInputJson inputMapToJson(Map<String, Object> inputMapData) {
4444
script_tags_map.put("Hani", UScript.HAN);
4545
script_tags_map.put("Hira", UScript.HIRAGANA);
4646
script_tags_map.put("Zzzz", UScript.UNKNOWN);
47-
48-
result.s1 = Utility.unescape((String) inputMapData.get("s1", null));
49-
result.s2 = Utility.unescape((String) inputMapData.get("s2", null));
47+
48+
result.s1 = (String) inputMapData.get("s1", null);
49+
result.s2 = (String) inputMapData.get("s2", null);
5050

5151
result.locale = (String) inputMapData.get("locale", null);
5252
result.strength = (String) inputMapData.get("strength", null);
5353

5454
result.ignorePunctuation = (boolean) inputMapData.get("ignorePunctuation", false);
5555
result.line = (int) ((double) inputMapData.get("line", 0.0));
5656

57+
result.caseFirst = (String) inputMapData.get("caseFirst", null);
58+
result.caseLevel = (String) inputMapData.get("caseLevel", null);
59+
5760
// Resolve "&lt;"
5861
result.compare_type = (String) inputMapData.get("compare_type", null);
5962
if (result.compare_type != null && ! result.compare_type.equals("") && result.compare_type.length() > 4) {
@@ -238,6 +241,17 @@ public Collator getCollatorForInput(CollatorInputJson input) {
238241
} catch (Exception e) {
239242
return null;
240243
}
244+
if (input.caseFirst != null) {
245+
if (input.caseFirst.equals("lower")) {
246+
collator.setLowerCaseFirst(true);
247+
} else if (input.caseFirst.equals("upper")) {
248+
collator.setUpperCaseFirst(true);
249+
250+
}
251+
}
252+
if (input.caseLevel != null && input.caseLevel.equals("on")) {
253+
collator.setCaseLevel(true);
254+
}
241255
}
242256

243257
// ensure that ICU performs decomposition before collation in order to get proper collators,

executors/icu4j/74/executor-icu4j/src/test/java/org/unicode/conformance/collator/icu74/CollatorTest.java

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public void testAttributesAsArrayList() {
4545
public void testRule004() {
4646
// in ICU 76.1 data
4747
String testInput =
48-
"{\"test_type\": \"collation\", \"compare_type\":\"&lt;3\",\"s1\":\"\u0002\",\"s2\":\"\u0300\",\"source_file\":\"collationtest.txt\",\"line\":43,\"label\":\"00002\",\"test_description\":\"simple CEs &amp; expansions\",\"rules\":\"&\\u0001<<<\\u0300&9<\\u0000&\\uA00A\\uA00B=\\uA002&\\uA00A\\uA00B\\u00050005=\\uA003\",\"hexhash\":\"7d3d23fab7f34c1cd44e90b40f7ed33c5bb317ba\"}";
48+
"{\"test_type\": \"collation\", \"compare_type\":\"&lt;3\",\"s1\":\"\u0002\",\"s2\":\"\u0300\",\"source_file\":\"collationtest.txt\",\"line\":43,\"label\":\"00002\",\"test_description\":\"simple CEs & expansions\",\"rules\":\"&\\u0001<<<\\u0300&9<\\u0000&\\uA00A\\uA00B=\\uA002&\\uA00A\\uA00B\\u00050005=\\uA003\",\"hexhash\":\"7d3d23fab7f34c1cd44e90b40f7ed33c5bb317ba\"}";
4949

5050
CollatorOutputJson output =
5151
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
@@ -175,5 +175,59 @@ public void testSHIFTED_SHORT() {
175175

176176
assertTrue(output.result);
177177
}
178+
179+
@Test
180+
public void testNON_IGNORABLE_0235457() {
181+
// Unexpected test failure
182+
String testInput =
183+
"{\"test_type\": \"collation\", \"label\": \"0235457\", \"s1\": \"\\ufe68A\", \"s2\": \"\\\\b\", \"line\": 8263, \"source_file\": \"CollationTest_NON_IGNORABLE_SHORT.txt\", \"hexhash\": \"c5dbf2d2a1b6eba940b76b840f2d991fde8f29c7\" }";
184+
CollatorOutputJson output = (CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
185+
186+
assertTrue(output.result);
187+
}
188+
189+
@Test
190+
public void test_01195_rules() {
191+
// Unexpected test failure
192+
String testInput =
193+
"{\"test_type\": \"collation\",\"compare_type\": \"<1\", \"s1\": \"opd\", \"s2\": \"op\\u0109\", \"source_file\": \"collationtest.txt\", \"line\": 2104, \"label\": \"01195\", \"test_description\": \"fall back to mappings with shorter prefixes, not immediately to ones with no prefixes\", \"rules\": \"&x=op|\\u0109&y=p|\\u00e7\", \"hexhash\": \"bc322b1e989cd75f5956b758dbf770b94f4011ff\" }";
194+
195+
CollatorOutputJson output = (CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
196+
197+
assertTrue(output.result);
198+
}
199+
200+
@Test
201+
public void test_00312_rules() {
202+
// Unexpected test failure
203+
String testInput =
204+
"{\"test_type\": \"collation\", \"compare_type\":\"<3\",\"s1\":\"aAt\",\"s2\":\"aa\",\"source_file\":\"collationtest.txt\",\"line\":658,\"label\":\"00312\",\"test_description\":\"tertiary CEs, tertiary, caseLevel=off, caseFirst=upper\",\"rules\":\"&\\\\u0001<<<t<<<T\",\"caseFirst\":\"upper\",\"hexhash\":\"453fdaeaaaf99825d778f1573de2612aafda84b2\"}";
205+
206+
CollatorOutputJson output = (CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
207+
208+
assertTrue(output.result);
209+
}
210+
211+
@Test
212+
public void test_00536_rules() {
213+
// Unexpected test failure
214+
String testInput =
215+
"{\"test_type\": \"collation\", \"compare_type\":\"<1\",\"s1\":\"z\",\"s2\":\" \",\"source_file\":\"collationtest.txt\",\"line\":1047,\"label\":\"00536\",\"test_description\":\"adjust special reset positions according to previous rules, CLDR ticket 6070\",\"rules\":\"&[last primary ignorable]<<x<<<y&[last primary ignorable]<<z\",\"hexhash\":\"49f1099a0032106e6a3861bb5d228568c6a74d47\"}";
216+
217+
CollatorOutputJson output = (CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
218+
219+
assertTrue(output.result);
220+
}
221+
222+
@Test
223+
public void test_00543_rules() {
224+
// Unexpected test failure
225+
String testInput =
226+
"{\"test_type\": \"collation\", \"compare_type\":\"<1\",\"s1\":\"\uFDD1\u00A0\",\"s2\":\"t\",\"source_file\":\"collationtest.txt\",\"line\":1070,\"label\":\"00543\",\"test_description\":\"adjust special reset positions according to previous rules, CLDR ticket 6070\",\"rules\":\"&[before 2][first variable]<<z&[before 2][first variable]<<y&[before 3][first variable]<<<x&[before 3][first variable]<<<w&[before 1][first variable]<v&[before 2][first variable]<<u&[before 3][first variable]<<<t&[before 2]\\\\uFDD1\\\\xA0<<s\",\"hexhash\":\"295bcd43ae62d58b89137aa45401f386881fc189\"}";
227+
228+
CollatorOutputJson output = (CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
229+
230+
assertTrue(output.result);
231+
}
178232
}
179233

testgen/generators/collation.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ def set_patterns(self):
4545
self.root_locale,
4646
self.test_line,
4747
self.attribute_test,
48-
self.reorder_test
48+
self.reorder_test,
49+
self.rule_header_pattern
4950
]
5051

5152
self.rule_breakout_patterns = [
@@ -172,6 +173,7 @@ def check_parse_compare(self, line_index, lines, filename):
172173
if is_comparison_match := self.comparison_line.match(line_in):
173174
compare_type = is_comparison_match.group(1)
174175
raw_string2 = is_comparison_match.group(3)
176+
# TODO: Handle raw_string.replace('\\x', '\u00')
175177
string2 = ''
176178
try:
177179
string2 = raw_string2.rstrip() # Don't do any unescaping
@@ -182,9 +184,13 @@ def check_parse_compare(self, line_index, lines, filename):
182184
# Special cases for comparing only with \u0020 or \u000a
183185
if string2 == '' and raw_line.find('\\u0020') > 0:
184186
string2 = '\u0020'
187+
if string2 == '' and raw_line.find('\\x20') > 0:
188+
string2 = '\u0020'
185189
if string2 == '' and raw_line.find('\\u000A') > 0:
186190
string2 = '\u000a'
187-
191+
# A very special case. Generalize?\ to hand \xYZ?
192+
if raw_line.find('\\uFDD1\\xA0') > 0:
193+
string2 = '\ufdd1\u00a0'
188194
new_test = {
189195
'compare_type': compare_type,
190196
's1': string1,

0 commit comments

Comments
 (0)