|
1 | 1 | # -*- coding: utf-8 -*- |
2 | | - |
3 | | -import unittest |
| 2 | +import pytest |
4 | 3 |
|
5 | 4 | from mathics_scanner import IncompleteSyntaxError, ScanError |
6 | 5 | from mathics_scanner.prescanner import Prescanner |
7 | 6 | from mathics_scanner.feed import SingleLineFeeder |
8 | 7 |
|
9 | 8 |
|
10 | | -class PrescannerTest(unittest.TestCase): |
11 | | - def prescan(self, code): |
12 | | - prescanner = Prescanner(SingleLineFeeder(code)) |
13 | | - return prescanner.scan() |
14 | | - |
15 | | - def invalid(self, code): |
16 | | - self.assertRaises(ScanError, self.prescan, code) |
17 | | - |
18 | | - def incomplete(self, code): |
19 | | - self.assertRaises(IncompleteSyntaxError, self.prescan, code) |
20 | | - |
21 | | - def equal(self, code, result): |
22 | | - assert self.prescan(code) == result |
23 | | - |
24 | | - def equal_length(self, code, length): |
25 | | - assert len(self.prescan(code)) == length |
26 | | - |
27 | | - def test_named_characters(self): |
28 | | - self.equal(r"\[Theta]", "\u03B8") |
29 | | - self.equal(r"\[CapitalPi]", "\u03A0") |
30 | | - self.equal(r"\[Fake]", r"\[Fake]") |
31 | | - self.equal("z \\[Conjugate]", "z \uF3C8") |
32 | | - self.equal("z \\[Integral]", "z \u222b") |
33 | | - self.equal("z \\\\[Integral]", "z \\\\[Integral]") |
34 | | - self.equal("z \\\\\\[Integral]", "z \\\\\u222b") |
35 | | - self.equal("abc\\\\", "abc\\\\") |
36 | | - |
37 | | - def test_lengths(self): |
38 | | - self.equal_length(r'"\[Integral]"', 3) |
39 | | - # Prescanner keep both slashes and quotes. |
40 | | - # The tokenizer brings \\ into \ if it appears |
41 | | - # inside a string. |
42 | | - self.equal_length(r'"\\[Integral]"', 14) |
43 | | - |
44 | | - def test_oct(self): |
45 | | - self.equal(r"\051", ")") |
46 | | - |
47 | | - def test_hex_dot(self): |
48 | | - self.equal(r"\.30", "0") |
49 | | - |
50 | | - def test_hex_colon(self): |
51 | | - self.equal(r"\:0030", "0") |
52 | | - self.equal(r"\:03B8", "\u03B8") |
53 | | - self.equal(r"\:03b8", "\u03B8") |
54 | | - |
55 | | - def test_hex_vbar(self): |
56 | | - self.equal(r"\|01D451", "\U0001D451") |
57 | | - |
58 | | - def test_incomplete(self): |
59 | | - self.incomplete(r"\[") |
60 | | - self.incomplete(r"\[Theta") |
61 | | - |
62 | | - def test_invalid_oct(self): |
63 | | - self.invalid(r"\093") |
64 | | - self.invalid(r"\01") |
65 | | - |
66 | | - def test_invalid_colon(self): |
67 | | - self.invalid(r"\:") |
68 | | - self.invalid(r"\:A") |
69 | | - self.invalid(r"\:01") |
70 | | - self.invalid(r"\:A1") |
71 | | - self.invalid(r"\:ak") |
72 | | - self.invalid(r"\:A10") |
73 | | - self.invalid(r"\:a1g") |
74 | | - self.invalid(r"\:A1g9") |
75 | | - self.invalid(r"\:01-2") |
76 | | - |
77 | | - def test_invalid_dot(self): |
78 | | - self.invalid(r"\.") |
79 | | - self.invalid(r"\.0") |
80 | | - |
81 | | - def test_combined(self): |
82 | | - self.equal(r"\:03B8\[Theta]\.30\052", "\u03B8\u03B80*") |
83 | | - |
84 | | - def test_nested(self): |
85 | | - self.equal(r"\[Thet\141]", r"\[Thet\141]") |
86 | | - |
87 | | - def test_trailing_backslash(self): |
88 | | - self.incomplete("x \\") |
| 9 | +def replace_escape_sequences(code): |
| 10 | + prescanner = Prescanner(SingleLineFeeder(code)) |
| 11 | + return prescanner.replace_escape_sequences() |
| 12 | + |
| 13 | + |
| 14 | +def invalid(code): |
| 15 | + with pytest.raises(ScanError): |
| 16 | + replace_escape_sequences(code) |
| 17 | + |
| 18 | + |
| 19 | +def incomplete(code): |
| 20 | + with pytest.raises(IncompleteSyntaxError): |
| 21 | + replace_escape_sequences(code) |
| 22 | + |
| 23 | + |
| 24 | +def equal(code, result): |
| 25 | + assert replace_escape_sequences(code) == result |
| 26 | + |
| 27 | + |
| 28 | +def equal_length(code, length): |
| 29 | + assert len(replace_escape_sequences(code)) == length |
| 30 | + |
| 31 | + |
| 32 | +def test_named_characters(): |
| 33 | + equal(r"\[Theta]", "\u03B8") |
| 34 | + equal(r"\[CapitalPi]", "\u03A0") |
| 35 | + equal(r"\[Fake]", r"\[Fake]") |
| 36 | + equal("z \\[Conjugate]", "z \uF3C8") |
| 37 | + equal("z \\[Integral]", "z \u222b") |
| 38 | + equal("z \\\\[Integral]", "z \\\\[Integral]") |
| 39 | + equal("z \\\\\\[Integral]", "z \\\\\u222b") |
| 40 | + equal("abc\\\\", "abc\\\\") |
| 41 | + |
| 42 | + |
| 43 | +def test_text_lengths(): |
| 44 | + equal_length(r'"\[Integral]"', 3) |
| 45 | + # Prescanner keep both slashes and quotes. |
| 46 | + # The tokenizer brings \\ into \ if it appears |
| 47 | + # inside a string. |
| 48 | + equal_length(r'"\\[Integral]"', 14) |
| 49 | + |
| 50 | + |
| 51 | +def test_oct(): |
| 52 | + equal(r"\051", ")") |
| 53 | + |
| 54 | + |
| 55 | +def test_hex_dot(): |
| 56 | + equal(r"\.30", "0") |
| 57 | + |
| 58 | + |
| 59 | +def test_hex_colon(): |
| 60 | + equal(r"\:0030", "0") |
| 61 | + equal(r"\:03B8", "\u03B8") |
| 62 | + equal(r"\:03b8", "\u03B8") |
| 63 | + |
| 64 | + |
| 65 | +def test_hex_vbar(): |
| 66 | + equal(r"\|01D451", "\U0001D451") |
| 67 | + |
| 68 | + |
| 69 | +def test_incomplete(): |
| 70 | + incomplete(r"\[") |
| 71 | + incomplete(r"\[Theta") |
| 72 | + |
| 73 | + |
| 74 | +def test_invalid_oct(): |
| 75 | + invalid(r"\093") |
| 76 | + invalid(r"\01") |
| 77 | + |
| 78 | + |
| 79 | +def test_invalid_colon(): |
| 80 | + invalid(r"\:") |
| 81 | + invalid(r"\:A") |
| 82 | + invalid(r"\:01") |
| 83 | + invalid(r"\:A1") |
| 84 | + invalid(r"\:ak") |
| 85 | + invalid(r"\:A10") |
| 86 | + invalid(r"\:a1g") |
| 87 | + invalid(r"\:A1g9") |
| 88 | + invalid(r"\:01-2") |
| 89 | + |
| 90 | + |
| 91 | +def test_invalid_dot(): |
| 92 | + invalid(r"\.") |
| 93 | + invalid(r"\.0") |
| 94 | + |
| 95 | + |
| 96 | +def test_combined(): |
| 97 | + equal(r"\:03B8\[Theta]\.30\052", "\u03B8\u03B80*") |
| 98 | + |
| 99 | + |
| 100 | +def test_nested(): |
| 101 | + equal(r"\[Thet\141]", r"\[Thet\141]") |
| 102 | + |
| 103 | + |
| 104 | +def test_trailing_backslash(): |
| 105 | + incomplete("x \\") |
0 commit comments