Skip to content

Commit 89334c7

Browse files
author
Craig Cornelius
authored
Update known issue handling for DateTime Format (#518)
* Updating number format data generation and NodeJS number_fmt * Updated C++ number format, fixing errors and failing tests * Fix number format in ICU4J * Remove cout * Add halfOdd to possible test results * Add dateTimeFormatType to datagen, schema, and CPP version * Updating regex for unsupported test types. * Updates as suggested * Classify some datetime_fmt errors as known issues, fixing some ICU4C and ICU4J failures. * Handle Node known issue for timezone only format * responding to review suggestions * DateTime checking known issues * Removing schema change * Removing change in cpp file * Undoing unrelated change * Remove extra check for comma * Remove copy * Reply to suggestions * Restore check_list_differences * Fix when result is missing * Fixing types of comparisons for known issues
1 parent 31ef9fd commit 89334c7

File tree

2 files changed

+98
-120
lines changed

2 files changed

+98
-120
lines changed

verifier/check_known_issues.py

Lines changed: 64 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ class knownIssueType(Enum):
5757
# Datetime format
5858
datetime_fmt_at_inserted = 'Alternate formatting with "at" between time and date'
5959
datetime_fmt_arabic_comma = 'Arabic comma vs. ASCII comma'
60-
datetime_unexpected_comma = 'Unexpected comma'
6160
datetime_inserted_comma = 'inserted comma'
6261

6362
datetime_semantic_Z = 'NodeJS always includes date or time'
6463

64+
datetime_GMT_UTC = 'UTC instead of GMT'
6565

6666
# Likely Subtags
6767
likely_subtags_sr_latn = "sr_latin becoming en"
@@ -108,9 +108,9 @@ def diff_ascii_space_vs_nbsp(actual, expected_value):
108108
if not expected_value or not actual:
109109
return None
110110

111-
# If replacing all the NBSP characdters in expected gives the actual result,
112-
# then the only differences were with this type of space in formatted output.
113-
if expected_value.replace(NBSP, SP) == actual:
111+
# Normalizing all NBSP spaces to ASCII in both to check if the type of space
112+
# is the only difference in formatted output.
113+
if actual.replace(NBSP, SP) == expected_value.replace(NBSP, SP):
114114
return knownIssueType.known_issue_sp_nbsp
115115
else:
116116
return None
@@ -164,41 +164,35 @@ def unsupported_unit_quarter(test):
164164
return None
165165

166166

167-
def dt_check_for_alternate_long_form(test, actual, expected):
167+
def dt_check_for_alternate_long_form(actual, expected):
168168
# For datetime_fmt, is the format type "standard"?
169169
if actual == expected:
170170
return None
171-
if actual.replace(' at', ',') == expected:
172-
return knownIssueType.datetime_fmt_at_inserted
173-
174-
# Thai language difference with "time" inserted
175-
if actual.replace('เวลา ', '') == expected:
176-
return knownIssueType.datetime_fmt_at_inserted
177-
# Arabic
178-
if actual.replace(' في', '،') == expected:
179-
return knownIssueType.datetime_fmt_at_inserted
180-
# Vietnamese
181-
if actual.replace('lúc ', '') == expected:
182-
return knownIssueType.datetime_fmt_at_inserted
183-
# Bengali
184-
if actual.replace('এ ', '') == expected:
185-
return knownIssueType.datetime_fmt_at_inserted
171+
new_expected = expected.replace(NBSP, SP)
172+
new_actual = actual.replace(NBSP, SP)
173+
174+
# TODO: Make this an array of replacements
175+
replacements_to_try = [
176+
(' at', ','), # English
177+
('เวลา ', ''), # Thai
178+
(' في', '،'), # Arabic
179+
('lúc ', ''), # Vietnamese
180+
('এ ', ''), # Bengali
181+
]
182+
183+
for replacement in replacements_to_try:
184+
if new_actual.replace(replacement[0], replacement[1]) == new_expected:
185+
return knownIssueType.datetime_fmt_at_inserted
186186
return None
187187

188188

189-
def dt_check_arabic_comma(test, actual, expected):
189+
def dt_check_arabic_comma(actual, expected):
190190
if expected.replace('\u002c', '\u060c') == actual:
191191
return knownIssueType.datetime_fmt_arabic_comma
192192
return None
193193

194194

195-
def dt_unexpected_comma(test, actual, expected):
196-
if actual.replace('\u002c', '') == expected:
197-
return knownIssueType.datetime_unexpected_comma
198-
else:
199-
return None
200-
201-
def dt_inserted_comma(test, actual, expected):
195+
def dt_inserted_comma(actual, expected):
202196
sm = SequenceMatcher(None, expected, actual)
203197
sm_opcodes = sm.get_opcodes()
204198
# Look for one additional comma
@@ -211,10 +205,23 @@ def dt_inserted_comma(test, actual, expected):
211205
return None
212206

213207

214-
def check_datetime_known_issues(test):
208+
def dt_gmt_utc(actual, expected):
209+
# The difference may also include NBSP vs ASCII space
210+
new_expected = expected.replace(NBSP, SP)
211+
new_actual = actual.replace(NBSP, SP)
212+
213+
if new_actual.replace('UTC', 'GMT') == new_expected or \
214+
new_actual.replace('Coordinated Universal', 'Greenwich Mean') == new_expected:
215+
return knownIssueType.datetime_GMT_UTC
216+
return None
217+
218+
def check_datetime_known_issues(test, platform_info):
215219
# Examine a single test for date/time isses
216220
# Returns known issues identified for this test in this category
217221
remove_this_one = False
222+
223+
all_matching_issues = []
224+
218225
try:
219226
try:
220227
result = test['result']
@@ -224,50 +231,32 @@ def check_datetime_known_issues(test):
224231
expected = test['expected']
225232
input_data = test.get('input_data')
226233

227-
is_ki = diff_nbsp_vs_ascii_space(result, expected)
228-
if is_ki:
229-
# Mark the test with this issue
230-
test['known_issue'] = is_ki.value
231-
remove_this_one = True
232-
233-
is_ki = diff_ascii_space_vs_nbsp(result, expected)
234-
if is_ki:
235-
# Mark the test with this issue
236-
test['known_issue'] = is_ki.value
237-
remove_this_one = True
238-
239-
is_ki = numerals_replaced_by_another_numbering_system(result, expected)
240-
if is_ki:
241-
test['known_issue_id'] = is_ki.value
242-
remove_this_one = True
243-
244-
is_ki = dt_check_for_alternate_long_form(test, result, expected)
245-
if is_ki:
246-
test['known_issue_id'] = is_ki.value
247-
remove_this_one = True
234+
# Perform each test, computing matches with known issues by means of the functions in this list
235+
check_fns = [dt_gmt_utc, diff_nbsp_vs_ascii_space, diff_ascii_space_vs_nbsp, numerals_replaced_by_another_numbering_system,
236+
dt_check_arabic_comma, dt_inserted_comma, dt_check_for_alternate_long_form]
237+
for check_fn in check_fns:
238+
is_ki = check_fn(result, expected)
239+
if is_ki:
240+
test['known_issue_id'] = is_ki.value
241+
remove_this_one = True
242+
all_matching_issues.append(is_ki.value)
243+
244+
# Check if the semantic skeleton has "Z" for NodeJS
245+
if platform_info['platform'] == 'NodeJS' and result:
246+
if input_data and 'semanticSkeleton' in input_data['options'] and \
247+
input_data['options']['semanticSkeleton'] == 'Z':
248+
test['known_issue_id'] = knownIssueType.datetime_semantic_Z.value
249+
remove_this_one = True
250+
all_matching_issues.append(knownIssueType.datetime_semantic_Z.value)
248251

249-
is_ki = dt_check_arabic_comma(test, result, expected)
250-
if is_ki:
251-
test['known_issue_id'] = is_ki.value
252-
remove_this_one = True
253-
254-
is_ki = dt_unexpected_comma(test, result, expected)
255-
if is_ki:
256-
test['known_issue_id'] = is_ki.value
257-
remove_this_one = True
258-
259-
is_ki = dt_inserted_comma(test, result, expected)
260-
if is_ki:
261-
test['known_issue_id'] = is_ki.value
262-
remove_this_one = True
263252

264253
except BaseException as err:
265254
# Can't get the info
266255
pass
267256

268257
return remove_this_one
269258

270-
def check_rdt_known_issues(test):
259+
def check_rdt_known_issues(test, platform_info=None):
271260
# ??? Do wwe need platform ID and/or icu version?
272261
remove_this_one = False
273262
try:
@@ -292,7 +281,7 @@ def check_rdt_known_issues(test):
292281
return remove_this_one
293282

294283

295-
def check_likely_subtags_issues(test):
284+
def check_likely_subtags_issues(test, platform_info=None):
296285
remove_this_one = False
297286
try:
298287
result = test['result']
@@ -315,7 +304,7 @@ def sr_latin_likely_subtag(test):
315304
return None
316305

317306
# Language names
318-
def check_langnames_issues(test):
307+
def check_langnames_issues(test, platform_info=None):
319308
remove_this_one = False
320309
try:
321310
result = test['result']
@@ -391,7 +380,7 @@ def check_number_fmt_issues(test, platform_info):
391380
return None
392381

393382

394-
def check_plural_rules_issues(test):
383+
def check_plural_rules_issues(test, platform_info=None):
395384
try:
396385
input_data = test['input_data']
397386
sample_string = input_data['sample']
@@ -406,7 +395,7 @@ def check_plural_rules_issues(test):
406395
return None
407396

408397

409-
def check_collation_issues(test):
398+
def check_collation_issues(test, platform_info=None):
410399
input_data = test.get('input_data', {})
411400

412401
# Check for jsonc bug with surrogates
@@ -436,19 +425,19 @@ def compute_known_issues_for_single_test(test_type, test, platform_info):
436425
# Returns True if this single test is an example of one or more known issues,
437426
known_issue_found = False
438427
if test_type == ddt_data.testType.collation.value:
439-
known_issue_found = check_collation_issues(test)
428+
known_issue_found = check_collation_issues(test, platform_info)
440429
if test_type == ddt_data.testType.datetime_fmt.value:
441-
known_issue_found = check_datetime_known_issues(test)
430+
known_issue_found = check_datetime_known_issues(test, platform_info)
442431
elif test_type == ddt_data.testType.rdt_fmt.value:
443-
known_issue_found = check_rdt_known_issues(test)
432+
known_issue_found = check_rdt_known_issues(test, platform_info)
444433
elif test_type == ddt_data.testType.likely_subtags.value:
445-
known_issue_found = check_likely_subtags_issues(test)
434+
known_issue_found = check_likely_subtags_issues(test, platform_info)
446435
elif test_type == ddt_data.testType.lang_names.value:
447-
known_issue_found = check_langnames_issues(test)
436+
known_issue_found = check_langnames_issues(test, platform_info)
448437
elif test_type == ddt_data.testType.number_fmt.value:
449438
known_issue_found = check_number_fmt_issues(test, platform_info)
450439
elif test_type == ddt_data.testType.plural_rules.value:
451-
known_issue_found = check_plural_rules_issues(test)
440+
known_issue_found = check_plural_rules_issues(test, platform_info)
452441

453442
# TODO: Add checks here for known issues in other test types
454443

0 commit comments

Comments
 (0)