Skip to content

Commit aec3bcb

Browse files
author
Craig Cornelius
authored
Collation data generation fixes, reducing test failures (#472)
* Collation data generation fixes, reducing test failures * Fixing some issues * NodeJS collator fixed! * fixing temporary workaround * Fix regex * NodeJS collation: change to 'best fit' * Fix testgen for spaces in test data. Small improvement * Updating code to add actual input when collation fails. * Update collation result schema to handle actual options * Fix problems with locale in nodejs collation.
1 parent 22ef3d9 commit aec3bcb

File tree

8 files changed

+217
-57
lines changed

8 files changed

+217
-57
lines changed

executors/cpp/coll.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ auto TestCollator(json_object *json_in) -> string {
5555
json_object *str2 = json_object_object_get(json_in, "s2");
5656

5757
// Unescape the input strings?
58-
string string1 = json_object_get_string(str1);
58+
string string1 = json_object_get_string(str1) ;
5959
string string2 = json_object_get_string(str2);
6060

6161
// Does this conversion preserve the data?
@@ -139,7 +139,7 @@ auto TestCollator(json_object *json_in) -> string {
139139
if (check_icu_error(status, return_json, "create RuleBasedCollator")) {
140140
// Put json_in as the actual input received.
141141
json_object_object_add(
142-
return_json, "actual_input",
142+
return_json, "actual_options",
143143
json_object_new_string(json_object_get_string(json_in)));
144144

145145
return json_object_to_json_string(return_json);
@@ -233,15 +233,20 @@ auto TestCollator(json_object *json_in) -> string {
233233
if (!coll_result) {
234234
// Test did not succeed!
235235
// Include data compared in the failing test
236+
json_object* actual_values = json_object_new_object();
237+
236238
json_object_object_add(
237-
return_json, "s1", json_object_new_string(string1.c_str()));
239+
actual_values, "s1_actual", json_object_new_string(string1.c_str()));
238240
json_object_object_add(
239-
return_json, "s2", json_object_new_string(string2.c_str()));
240-
241+
actual_values, "s2_actual", json_object_new_string(string2.c_str()));
241242
json_object_object_add(
242-
return_json, "actual_input",
243+
actual_values, "input",
243244
json_object_new_string(json_object_get_string(json_in)));
244245

246+
json_object_object_add(
247+
return_json, "actual_options",
248+
actual_values);
249+
245250
// Record the actual returned value
246251
json_object_object_add(
247252
return_json, "compare", json_object_new_int64(uni_result));

executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/collator/CollatorTester.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,25 @@ public ITestTypeOutputJson execute(ITestTypeInputJson inputJson) {
9797
}
9898
}
9999

100+
// Use the compare_type field to set the strength of collation test.
101+
if (input.compare_type != null){
102+
if (input.compare_type.equals("=")) {
103+
coll.setStrength(Collator.IDENTICAL);
104+
} else
105+
if (input.compare_type.equals("<1")) {
106+
coll.setStrength(Collator.PRIMARY);
107+
} else
108+
if (input.compare_type.equals("<2")) {
109+
coll.setStrength(Collator.SECONDARY);
110+
} else
111+
if (input.compare_type.equals("<3")) {
112+
coll.setStrength(Collator.TERTIARY);
113+
} else
114+
if (input.compare_type.equals("<4")) {
115+
coll.setStrength(Collator.QUATERNARY);
116+
}
117+
}
118+
100119
try {
101120
int collResult = coll.compare(input.s1, input.s2);
102121
// TODO! Use compare_type to check for <= or ==.
@@ -158,7 +177,7 @@ public Collator getCollatorForInput(CollatorInputJson input) {
158177
}
159178
}
160179
} else {
161-
ULocale locale = ULocale.forLanguageTag(input.locale);
180+
ULocale locale = new ULocale(input.locale);
162181
result = (RuleBasedCollator) Collator.getInstance(locale);
163182
if (input.rules != null) {
164183
String defaultRules = result.getRules();

executors/icu4j/74/executor-icu4j/src/test/java/org/unicode/conformance/collator/icu74/CollatorTest.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import static org.junit.Assert.assertTrue;
44

5+
import org.junit.Ignore;
56
import org.junit.Test;
67
import org.unicode.conformance.testtype.collator.CollatorOutputJson;
78
import org.unicode.conformance.testtype.collator.CollatorTester;
@@ -41,6 +42,54 @@ public void testAttributesAsArrayList() {
4142
assertTrue(output.result);
4243
}
4344

45+
@Test
46+
public void testRule004() {
47+
// in ICU 76.1 data
48+
String testInput =
49+
"{\"test_type\": \"collation\", \"compare_type\":\"&lt;3\",\"s1\":\"\u0002\",\"s2\":\"\u0300\",\"source_file\":\"collationtest.txt\",\"line\":43,\"label\":\"00002\",\"test_description\":\"simple CEs &amp; expansions\",\"rules\":\"&\\u0001<<<\\u0300&9<\\u0000&\\uA00A\\uA00B=\\uA002&\\uA00A\\uA00B\\u00050005=\\uA003\",\"hexhash\":\"7d3d23fab7f34c1cd44e90b40f7ed33c5bb317ba\"}";
50+
51+
CollatorOutputJson output =
52+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
53+
54+
assertTrue(output.result);
55+
}
56+
57+
@Test
58+
public void test1362() {
59+
// in ICU 76.1 data
60+
String testInput =
61+
"{\"test_type\": \"collation\", \"compare_type\":\"<2\",\"s1\":\"ae\",\"s2\":\"ä\",\"source_file\":\"collationtest.txt\",\"line\":2426,\"label\":\"01362\",\"localetag\":\"de-u-co-phonebk\", \"locale\": \"de@collation=PhoneBook\",\"test_description\":\"locale @collation=type should be case-insensitive\",\"hexhash\":\"893bdab906f7bc0e918ce388c7e78799d5913eaa\"}";
62+
63+
CollatorOutputJson output =
64+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
65+
66+
assertTrue(output.result);
67+
}
68+
69+
@Ignore
70+
@Test
71+
public void test00144() {
72+
// in ICU 76.1 data
73+
String testInput =
74+
"{\"test_type\": \"collation\", \"compare_type\":\"&lt;2\",\"s1\":\"cote\",\"s2\":\"coté\",\"source_file\":\"collationtest.txt\",\"line\":329,\"label\":\"00144\",\"locale\":\"root\",\"test_description\":\"côté with forwards secondary\",\"hexhash\":\"9a83942120095cac5793c15daebdf05cf30994ab\"}";
75+
76+
CollatorOutputJson output =
77+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
78+
79+
assertTrue(output.result);
80+
}
81+
82+
@Ignore
83+
@Test
84+
public void testIdentical() {
85+
// In ICU 76.1 data
86+
String testInput = "{\"test_type\": \"collation\", \"compare_type\": \"=\", \"s1\": \"a\u0327\", \"s2\": \"\u00e2\u0093\u0090\u00e2\u009d\u00ba\", \"source_file\": \"collationtest.txt\", \"line\": 136, \"label\": \"00032\", \"test_description\": \"simple contractions\", \"rules\": \"&a=\\u00e2\\u0093\\u0090&b<bz=\\u00e2\\u0093\\u0091&d<dz\\u0301=\\u00e2\\u0093\\u0093&z<a\\u0301=\\u00e2\\u0092\\u00b6<a\\u0301\\u0301=\\u00e2\\u0092\\u00b7<a\\u0301\\u0301\\u0358=\\u00e2\\u0092\\u00b8<a\\u030a=\\u00e2\\u0092\\u00b9<a\\u0323=\\u00e2\\u0092\\u00ba<a\\u0323\\u0358=\\u00e2\\u0092\\u00bb<a\\u0327\\u0323\\u030a=\\u00e2\\u0092\\u00bc<a\\u0327\\u0323bz=\\u00e2\\u0092\\u00bd&\\ud834\\udd58=\\u00e2\\u0081\\u00b0<\\ud834\\udd58\\ud834\\udd65=\\u00c2\\u00bc&\\u0001<<<\\ud834\\udd65=\\u00c2\\u00b9<<<\\ud834\\udd6d=\\u00c2\\u00b2<<<\\ud834\\udd65\\ud834\\udd6d=\\u00c2\\u00b3&\\u0301=\\u00e2\\u009d\\u00b6&\\u030a=\\u00e2\\u009d\\u00b7&\\u0308=\\u00e2\\u009d\\u00b8<<\\u0308\\u0301=\\u00e2\\u009d\\u00b9&\\u0327=\\u00e2\\u009d\\u00ba&\\u0323=\\u00e2\\u009d\\u00bb&\\u0331=\\u00e2\\u009d\\u00bc<<\\u0331\\u0358=\\u00e2\\u009d\\u00bd&\\u0334=\\u00e2\\u009d\\u00be&\\u0358=\\u00e2\\u009d\\u00bf&\\u0f71=\\u00e2\\u0091&\\u0f72=\\u00e2\\u0091\\u00a1&\\u0f73=\\u00e2\\u0091\\u00a2\", \"hexhash\": \"b43889a20872ad4d242f9e94c942cc56cbb89b75\"}";
87+
88+
CollatorOutputJson output =
89+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
90+
91+
assertTrue(output.result);
92+
}
4493
/* @Test
4594
public void testCompareLT2() {
4695
String testInput =
@@ -50,4 +99,15 @@ public void testCompareLT2() {
5099
51100
assertTrue(output.result);
52101
}*/
102+
103+
@Test
104+
public void test00002() {
105+
String testInput =
106+
"{\"test_type\": \"collation\", \"compare_type\":\"<3\",\"s1\":\"\u0002\",\"s2\":\"\u0300\",\"source_file\":\"collationtest.txt\",\"line\":43,\"label\":\"00002\",\"test_description\":\"simple CEs & expansions\",\"rules\":\"&\\\\u0001<<<\\u0300&9<\\u0000&\\uA00A\\uA00B=\\uA002&\\uA00A\\uA00B\\u00050005=\\uA003\",\"hexhash\":\"87be5cda089d675543eb91b948e2d7f74227ff0d\"}";
107+
108+
CollatorOutputJson output =
109+
(CollatorOutputJson) CollatorTester.INSTANCE.getStructuredOutputFromInputStr(testInput);
110+
111+
assertTrue(output.result);
112+
}
53113
}

executors/node/collator.js

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,52 @@ module.exports = {
55
testCollationShort: function(json) {
66
// Global default locale
77

8+
let outputLine = {'label':json['label']};
9+
810
// Locale if provided in the test data.
9-
let testLocale = undefined;
11+
let testLocale = 'en'; // default
1012
if ('locale' in json) {
1113
testLocale = json['locale'];
1214
}
15+
16+
if (testLocale == 'root') {
17+
outputLine = {'label': json['label'],
18+
'error_message': "root locale",
19+
'unsupported': 'root locale',
20+
'error_detail': testLocale,
21+
'error': 'Unsupported locale'
22+
};
23+
return outputLine;
24+
}
25+
26+
// Check if this locale is actually supported
27+
try {
28+
const supported_locales =
29+
Intl.Collator.supportedLocalesOf([testLocale], {localeMatcher: "best fit"});
30+
31+
if (supported_locales.length == 1 && supported_locales[0] != testLocale) {
32+
testLocale = supported_locales[0];
33+
outputLine['substituted_locale'] = testLocale;;
34+
}
35+
else if (supported_locales.length <= 0 ||
36+
!supported_locales.includes(testLocale)) {
37+
// Report as unsupported
38+
outputLine['error_message'] = "unsupported locale";
39+
outputLine['unsupported'] = testLocale;
40+
outputLine['error_detail'] = supported_locales;
41+
outputLine['error'] = "unsupported locale";
42+
return outputLine;
43+
}
44+
} catch (error) {
45+
console.log("ERROR @ 44 ", error.name, " ", error.message);
46+
console.log(" testLocale = ", testLocale);
47+
outputLine['unsupported'] = "supportedLocalsOf";
48+
outputLine['error_message'] = error.message;
49+
outputLine['error_detail'] = testLocale;
50+
outputLine['error'] = error.name;
51+
return outputLine;
52+
}
53+
1354
let testCollOptions = {};
1455
if ('ignorePunctuation' in json) {
1556
testCollOptions['ignorePunctuation'] = json['ignorePunctuation'];
@@ -30,11 +71,10 @@ module.exports = {
3071
testCollOptions['sensitivity'] = 'accent';
3172
} else
3273
if (strength == 'tertiary') {
33-
testCollOptions['sensitivity'] = 'case';
74+
testCollOptions['sensitivity'] = 'variant';
3475
}
3576
}
3677

37-
let outputLine = {'label':json['label']};
3878
// Get other fields if provided
3979
let rules = undefined;
4080
if ('rules' in json) {
@@ -86,27 +126,34 @@ module.exports = {
86126
outputLine['compare_result'] = compared;
87127
} else {
88128
// Additional info for the comparison
89-
outputLine['actual_options'] = JSON.stringify(coll.resolvedOptions());
129+
outputLine['actual_options'] = {
130+
'compared_result': compared,
131+
's1': d1,
132+
's2': d2,
133+
'options': JSON.stringify(coll.resolvedOptions())
134+
};
90135
outputLine['compare_result'] = compared;
91136
outputLine['result'] = result;
92137
}
93138

94139
} catch (error) {
95140
const error_message = error.message;
96-
if (testLocale == "root" ||
97-
error_message == "Incorrect locale information provided") {
141+
console.log('ERROR @ 135: ', error);
142+
if (error_message == "Incorrect locale information provided") {
98143
outputLine = {'label': json['label'],
99144
'error_message': error.message,
100-
'unsupported': 'root locale',
145+
'unsupported': 'UNSUPPORTED',
101146
'error_detail': error_message + ': ' + testLocale,
102-
'error': 'Unsupported locale'
147+
'actual_options': JSON.stringify(coll.resolvedOptions()),
103148
};
104149
} else {
150+
console.log("ERROR @ 144 ", error.name, " ", error.message);
105151
// Another kind of error.
106152
outputLine = {'label': json['label'],
107153
'error_message': error.message,
108154
'error_detail': testLocale,
109-
'error': error.name
155+
'error': error.name,
156+
'actual_options': JSON.stringify(coll.resolvedOptions()),
110157
};
111158
}
112159
}

schema/collation/result_schema.json

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,19 @@
8484
"type": "string"
8585
},
8686
"actual_options": {
87+
"type": ["object", "string"],
88+
"additionalProperties": true,
8789
"description": "Options used by collation as a string",
88-
"type": "string"
90+
"properties": {
91+
"s1_actual": {
92+
"type": "string",
93+
"description": "actual input string1"
94+
},
95+
"s2_actual": {
96+
"type": "string",
97+
"description": "actual input string1"
98+
}
99+
}
89100
},
90101
"input_data": {
91102
"type": "string",

testdriver/testplan.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -470,10 +470,9 @@ def run_multitest_mode(self):
470470
def open_json_test_data(self):
471471
# Read JSON file with results.
472472
try:
473-
input_file = open(self.inputFilePath,
474-
encoding='utf-8', mode='r')
475-
file_raw = input_file.read()
476-
input_file.close()
473+
with open(self.inputFilePath,
474+
encoding='utf-8', mode='r') as input_file:
475+
file_raw = input_file.read()
477476
try:
478477
self.jsonData = json.loads(file_raw)
479478
except json.JSONDecodeError as error:

testgen/generators/base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22
from abc import ABC, abstractmethod
3+
import codecs
34
import copy
45
import hashlib
56
import json
@@ -82,9 +83,10 @@ def saveJsonFile(self, filename, data, indent=None):
8283
filename)
8384

8485
output_path = os.path.join(self.icu_version, filename)
85-
output_file = open(output_path, "w", encoding="utf-8")
86-
json.dump(data, output_file, indent=indent)
87-
output_file.close()
86+
# output_file = open(output_path, "w", encoding="utf8")
87+
88+
with open(output_path, "w", encoding="utf8") as output_file:
89+
json.dump(data, output_file, indent=indent)
8890

8991
def getTestDataFromGitHub(self, datafile_name, version):
9092
# Path for fetching test data from ICU repository
@@ -132,7 +134,7 @@ def readFile(self, filename, version="", filetype="txt"):
132134
if version:
133135
path = os.path.join(version, filename)
134136
try:
135-
with open(path, "r", encoding="utf-8") as testdata:
137+
with codecs.open(path, "r", encoding="utf-8") as testdata:
136138
return json.load(testdata) if filetype == "json" else testdata.read()
137139
except BaseException as err:
138140
logging.warning("** readFile: %s", err)

0 commit comments

Comments
 (0)