Skip to content

Commit e34e4b6

Browse files
committed
[GR-27153] Get test_difflib to pass.
PullRequest: graalpython/1377
2 parents 79032be + f7a93b4 commit e34e4b6

File tree

7 files changed

+133
-13
lines changed

7 files changed

+133
-13
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_difflib.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
*graalpython.lib-python.3.test.test_difflib.TestAutojunk.test_one_insert_homogenous_sequence
22
*graalpython.lib-python.3.test.test_difflib.TestBytes.test_byte_content
3+
*graalpython.lib-python.3.test.test_difflib.TestBytes.test_byte_filenames
34
*graalpython.lib-python.3.test.test_difflib.TestBytes.test_mixed_types_content
45
*graalpython.lib-python.3.test.test_difflib.TestBytes.test_mixed_types_dates
56
*graalpython.lib-python.3.test.test_difflib.TestBytes.test_mixed_types_filenames
@@ -8,6 +9,7 @@
89
*graalpython.lib-python.3.test.test_difflib.TestJunkAPIs.test_is_line_junk_REDOS
910
*graalpython.lib-python.3.test.test_difflib.TestJunkAPIs.test_is_line_junk_false
1011
*graalpython.lib-python.3.test.test_difflib.TestJunkAPIs.test_is_line_junk_true
12+
*graalpython.lib-python.3.test.test_difflib.TestMain.test_main
1113
*graalpython.lib-python.3.test.test_difflib.TestOutputFormat.test_no_trailing_tab_on_empty_filedate
1214
*graalpython.lib-python.3.test.test_difflib.TestOutputFormat.test_range_format_context
1315
*graalpython.lib-python.3.test.test_difflib.TestOutputFormat.test_range_format_unified
@@ -21,6 +23,7 @@
2123
*graalpython.lib-python.3.test.test_difflib.TestSFpatches.test_html_diff
2224
*graalpython.lib-python.3.test.test_difflib.TestSFpatches.test_make_file_default_charset
2325
*graalpython.lib-python.3.test.test_difflib.TestSFpatches.test_make_file_iso88591_charset
26+
*graalpython.lib-python.3.test.test_difflib.TestSFpatches.test_make_file_usascii_charset_with_nonascii_input
2427
*graalpython.lib-python.3.test.test_difflib.TestSFpatches.test_recursion_limit
2528
*graalpython.lib-python.3.test.test_difflib.TestWithAscii.test_bjunk
2629
*graalpython.lib-python.3.test.test_difflib.TestWithAscii.test_one_delete

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
143143
@Cached ConditionProfile strictProfile,
144144
@Cached ConditionProfile backslashreplaceProfile,
145145
@Cached ConditionProfile surrogatepassProfile,
146+
@Cached ConditionProfile surrogateescapeProfile,
147+
@Cached ConditionProfile xmlcharrefreplaceProfile,
146148
@Cached RaiseEncodingErrorNode raiseEncodingErrorNode,
147149
@Cached PRaiseNode raiseNode) {
148150
boolean fixed;
@@ -154,6 +156,10 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
154156
fixed = backslashreplace(encoder);
155157
} else if (surrogatepassProfile.profile(SURROGATEPASS.equals(errorAction))) {
156158
fixed = surrogatepass(encoder);
159+
} else if (surrogateescapeProfile.profile(SURROGATEESCAPE.equals(errorAction))) {
160+
fixed = surrogateescape(encoder);
161+
} else if (xmlcharrefreplaceProfile.profile(XMLCHARREFREPLACE.equals(errorAction))) {
162+
fixed = xmlcharrefreplace(encoder);
157163
} else {
158164
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ERROR_HANDLER, errorAction);
159165
}
@@ -213,6 +219,90 @@ private static boolean surrogatepass(TruffleEncoder encoder) {
213219
return false;
214220
}
215221

222+
@TruffleBoundary
223+
private static boolean surrogateescape(TruffleEncoder encoder) {
224+
String p = new String(encoder.getInputChars(encoder.getErrorLength()));
225+
byte[] replacement = new byte[p.length()];
226+
int outp = 0;
227+
for (int i = 0; i < p.length();) {
228+
int ch = p.codePointAt(i);
229+
if (!(0xDC80 <= ch && ch <= 0xDCFF)) {
230+
// Not a surrogate
231+
return false;
232+
}
233+
replacement[outp++] = (byte) (ch - 0xdc00);
234+
i += Character.charCount(ch);
235+
}
236+
encoder.replace(encoder.getErrorLength(), replacement, 0, outp);
237+
return true;
238+
}
239+
240+
@TruffleBoundary
241+
private static boolean xmlcharrefreplace(TruffleEncoder encoder) {
242+
String p = new String(encoder.getInputChars(encoder.getErrorLength()));
243+
int size = 0;
244+
for (int i = 0; i < encoder.getErrorLength(); ++i) {
245+
// object is guaranteed to be "ready"
246+
int ch = p.codePointAt(i);
247+
if (ch < 10) {
248+
size += 2 + 1 + 1;
249+
} else if (ch < 100) {
250+
size += 2 + 2 + 1;
251+
} else if (ch < 1000) {
252+
size += 2 + 3 + 1;
253+
} else if (ch < 10000) {
254+
size += 2 + 4 + 1;
255+
} else if (ch < 100000) {
256+
size += 2 + 5 + 1;
257+
} else if (ch < 1000000) {
258+
size += 2 + 6 + 1;
259+
} else {
260+
size += 2 + 7 + 1;
261+
}
262+
}
263+
264+
byte[] replacement = new byte[size];
265+
int consumed = 0;
266+
// generate replacement
267+
for (int i = 0; i < p.length(); ++i) {
268+
int digits;
269+
int base;
270+
int ch = p.codePointAt(i);
271+
replacement[consumed++] = '&';
272+
replacement[consumed++] = '#';
273+
if (ch < 10) {
274+
digits = 1;
275+
base = 1;
276+
} else if (ch < 100) {
277+
digits = 2;
278+
base = 10;
279+
} else if (ch < 1000) {
280+
digits = 3;
281+
base = 100;
282+
} else if (ch < 10000) {
283+
digits = 4;
284+
base = 1000;
285+
} else if (ch < 100000) {
286+
digits = 5;
287+
base = 10000;
288+
} else if (ch < 1000000) {
289+
digits = 6;
290+
base = 100000;
291+
} else {
292+
digits = 7;
293+
base = 1000000;
294+
}
295+
while (digits-- > 0) {
296+
replacement[consumed++] = (byte) ('0' + ch / base);
297+
ch %= base;
298+
base /= 10;
299+
}
300+
replacement[consumed++] = ';';
301+
}
302+
encoder.replace(encoder.getErrorLength(), replacement, 0, consumed);
303+
return true;
304+
}
305+
216306
public static HandleEncodingErrorNode create() {
217307
return CodecsModuleBuiltinsFactory.HandleEncodingErrorNodeGen.create();
218308
}
@@ -250,6 +340,7 @@ static void doStrict(TruffleDecoder decoder, String errorAction, Object inputObj
250340
@Cached ConditionProfile strictProfile,
251341
@Cached ConditionProfile backslashreplaceProfile,
252342
@Cached ConditionProfile surrogatepassProfile,
343+
@Cached ConditionProfile surrogateescapeProfile,
253344
@Cached RaiseDecodingErrorNode raiseDecodingErrorNode,
254345
@Cached PRaiseNode raiseNode) {
255346
boolean fixed;
@@ -261,6 +352,8 @@ static void doStrict(TruffleDecoder decoder, String errorAction, Object inputObj
261352
fixed = backslashreplace(decoder);
262353
} else if (surrogatepassProfile.profile(SURROGATEPASS.equals(errorAction))) {
263354
fixed = surrogatepass(decoder);
355+
} else if (surrogateescapeProfile.profile(SURROGATEESCAPE.equals(errorAction))) {
356+
fixed = surrogateescape(decoder);
264357
} else {
265358
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ERROR_HANDLER, errorAction);
266359
}
@@ -308,6 +401,27 @@ private static boolean surrogatepass(TruffleDecoder decoder) {
308401
return false;
309402
}
310403

404+
@TruffleBoundary
405+
private static boolean surrogateescape(TruffleDecoder decoder) {
406+
int errorLength = decoder.getErrorLength();
407+
// decode up to 4 bytes
408+
int consumed = 0;
409+
boolean replaced = false;
410+
byte[] inputBytes = decoder.getInputBytes(errorLength);
411+
while (consumed < 4 && consumed < errorLength) {
412+
int b = inputBytes[consumed] & 0xff;
413+
// Refuse to escape ASCII bytes.
414+
if (b < 128) {
415+
break;
416+
}
417+
int codePoint = 0xdc00 + b;
418+
decoder.replace(1, Character.toChars(codePoint));
419+
replaced = true;
420+
consumed += 1;
421+
}
422+
return replaced;
423+
}
424+
311425
public static HandleDecodingErrorNode create() {
312426
return CodecsModuleBuiltinsFactory.HandleDecodingErrorNodeGen.create();
313427
}
@@ -323,14 +437,14 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
323437
errorAction = CodingErrorAction.IGNORE;
324438
break;
325439
case REPLACE:
326-
case SURROGATEESCAPE:
327440
case NAMEREPLACE:
328-
case XMLCHARREFREPLACE:
329441
errorAction = CodingErrorAction.REPLACE;
330442
break;
331443
case STRICT:
332444
case BACKSLASHREPLACE:
333445
case SURROGATEPASS:
446+
case SURROGATEESCAPE:
447+
case XMLCHARREFREPLACE:
334448
default:
335449
// Everything else will be handled by our Handle nodes
336450
errorAction = CodingErrorAction.REPORT;

graalpython/lib-python/3/test/test_imp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ def test_source_from_cache(self):
443443

444444

445445
class NullImporterTests(unittest.TestCase):
446+
@support.impl_detail("[GR-27024] [GR-23324] posix NFI support", graalvm=False)
446447
@unittest.skipIf(support.TESTFN_UNENCODABLE is None,
447448
"Need an undecodeable filename")
448449
def test_unencodeable(self):

graalpython/lib-python/3/test/test_import/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,7 @@ def exec_module(*args):
12671267
else:
12681268
importlib.SourceLoader.exec_module = old_exec_module
12691269

1270+
@impl_detail("[GR-27024] [GR-23324] posix NFI support", graalvm=False)
12701271
@unittest.skipUnless(TESTFN_UNENCODABLE, 'need TESTFN_UNENCODABLE')
12711272
def test_unencodable_filename(self):
12721273
# Issue #11619: The Python parser and the import machinery must not

graalpython/lib-python/3/test/test_unicode_file.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import unicodedata
66

77
import unittest
8-
from test.support import (run_unittest, rmtree, change_cwd,
8+
from test.support import (run_unittest, rmtree, change_cwd, impl_detail,
99
TESTFN_ENCODING, TESTFN_UNICODE, TESTFN_UNENCODABLE, create_empty_file)
1010

1111
if not os.path.supports_unicode_filenames:
@@ -117,11 +117,13 @@ def _test_single(self, filename):
117117

118118
# The 'test' functions are unittest entry points, and simply call our
119119
# _test functions with each of the filename combinations we wish to test
120+
@impl_detail("[GR-27024] [GR-23324] posix NFI support", graalvm=False)
120121
def test_single_files(self):
121122
self._test_single(TESTFN_UNICODE)
122123
if TESTFN_UNENCODABLE is not None:
123124
self._test_single(TESTFN_UNENCODABLE)
124125

126+
@impl_detail("[GR-27024] [GR-23324] posix NFI support", graalvm=False)
125127
def test_directories(self):
126128
# For all 'equivalent' combinations:
127129
# Make dir with encoded, chdir with unicode, checkdir with encoded

graalpython/lib-python/3/test/test_zipimport.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ def testTraceback(self):
644644
files = {TESTMOD + ".py": (NOW, raise_src)}
645645
self.doTest(None, files, TESTMOD, call=self.doTraceback)
646646

647+
@support.impl_detail("[GR-27024] [GR-23324] posix NFI support", graalvm=False)
647648
@unittest.skipIf(support.TESTFN_UNENCODABLE is None,
648649
"need an unencodable filename")
649650
def testUnencodable(self):

graalpython/lib-python/3/unittest/case.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,16 +1090,14 @@ def assertSequenceEqual(self, seq1, seq2, msg=None, seq_type=None):
10901090
except (TypeError, IndexError, NotImplementedError):
10911091
differing += ('Unable to index element %d '
10921092
'of second %s\n' % (len1, seq_type_name))
1093-
# standardMsg = differing
1094-
# diffMsg = '\n' + '\n'.join(
1095-
# difflib.ndiff(pprint.pformat(seq1).splitlines(),
1096-
# pprint.pformat(seq2).splitlines()))
1097-
#
1098-
# standardMsg = self._truncateMessage(standardMsg, diffMsg)
1099-
# msg = self._formatMessage(msg, standardMsg)
1100-
# self.fail(msg)
1101-
# TODO: Truffle reneable me once pformat / difflib work (GR-9152) (GR-9153)
1102-
self.fail(differing)
1093+
standardMsg = differing
1094+
diffMsg = '\n' + '\n'.join(
1095+
difflib.ndiff(pprint.pformat(seq1).splitlines(),
1096+
pprint.pformat(seq2).splitlines()))
1097+
1098+
standardMsg = self._truncateMessage(standardMsg, diffMsg)
1099+
msg = self._formatMessage(msg, standardMsg)
1100+
self.fail(msg)
11031101

11041102
def _truncateMessage(self, message, diff):
11051103
max_diff = self.maxDiff

0 commit comments

Comments
 (0)