Skip to content

Commit b9cf415

Browse files
mmaterarocky
andauthored
improves how \ is handled in the prescanner (#49)
* improves how \ is handled in the prescanner * fixup: Format Python code with Black * add a comment. simplifying the logic. * comment rewording by Rocky * adding length tests Co-authored-by: autoblack <[email protected]>
1 parent 76727db commit b9cf415

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

mathics_scanner/prescanner.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,12 @@ def scan(self):
5959
self.stubs.append(self.code[self.start : self.pos])
6060
self.newstub(self.pos + 2)
6161
else:
62-
self.pos += 1
62+
# Two backslashes in succession indicates a single backslash character,
63+
# rather than an escape sequence which also starts with a backslash.
64+
# Advance the scanning cursor (self.pos) over both backslashes.
65+
# Also, Python's backslash escape mechanism turns the two backslashes
66+
# into one in length calculations.
67+
self.pos += 2
6368
else:
6469
self.pos += 1
6570
self.stubs.append(self.code[self.start :]) # final stub

test/test_prescanner.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,25 @@ def incomplete(self, code):
2121
def equal(self, code, result):
2222
assert self.prescan(code) == result
2323

24+
def equal_length(self, code, length):
25+
assert len(self.prescan(code)) == length
26+
2427
def test_longnames(self):
2528
self.equal(r"\[Theta]", "\u03B8")
2629
self.equal(r"\[CapitalPi]", "\u03A0")
2730
self.equal(r"\[Fake]", r"\[Fake]")
2831
self.equal("z \\[Conjugate]", "z \uF3C8")
32+
self.equal("z \\[Integral]", "z \u222b")
33+
self.equal("z \\\\[Integral]", "z \\\\[Integral]")
34+
self.equal("z \\\\\\[Integral]", "z \\\\\u222b")
35+
self.equal("abc\\\\", "abc\\\\")
36+
37+
def test_lengths(self):
38+
self.equal_length(r'"\[Integral]"', 3)
39+
# Prescanner keep both slashes and quotes.
40+
# The tokenizer brings \\ into \ if it appears
41+
# inside a string.
42+
self.equal_length(r'"\\[Integral]"', 14)
2943

3044
def test_oct(self):
3145
self.equal(r"\051", ")")

0 commit comments

Comments
 (0)