Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions Lib/test/test_tools/test_lexer.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a better name and more general name is: test_lexer.py and then the two short files can be merged.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done!

Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Tests for scripts in the Tools directory.

This file contains regression tests for some of the scripts found in the
Tools directory of a Python checkout or tarball.
"""

import os
import unittest
from test.support.script_helper import assert_python_ok

from test.test_tools import toolsdir, skip_if_missing

skip_if_missing()

class ReindentTests(unittest.TestCase):
script = os.path.join(toolsdir, 'cases_generator', 'lexer.py')

def test_multiline_comment_dedent_dedent4(self):
input_code = """
int main() {
/*
This is a
multi-line comment.
Let's see if it de-indents correctly.
*/
return 0;
}

"""

expected_output = """
int main() {
/*
This is a
multi-line comment.
Let's see if it de-indents correctly.
*/
return 0;
}
"""

dedent_amount = '4'
rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount)
self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed")

def test_multiline_comment_dedent_dedent40(self):
input_code = """
int main() {
/*
This is a
multi-line comment.
Let's see if it de-indents correctly.
*/
return 0;
}

"""

expected_output = """
int main() {
/*
This is a
multi-line comment.
Let's see if it de-indents correctly.
*/
return 0;
}
"""

dedent_amount = '40'
rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount)
self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed")

if __name__ == '__main__':
unittest.main()
50 changes: 50 additions & 0 deletions Lib/test/test_tools/test_lexer_tokenize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Tests for scripts in the Tools directory.

This file contains regression tests for some of the scripts found in the
Tools directory of a Python checkout or tarball.
"""

import os
import unittest
from test.support.script_helper import assert_python_ok
from test.support import findfile

from test.test_tools import toolsdir, skip_if_missing

skip_if_missing()

class TokenizeTests(unittest.TestCase):
script = os.path.join(toolsdir, 'cases_generator', 'lexer.py')

def test_identifiers(self):
code = "int myVariable = 123;"
expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('myVariable', 1:5:15)\nEQUALS('=', 1:16:17)\nNUMBER('123', 1:18:21)\nSEMI(';', 1:21:22)\n", 'utf-8')
rc, out, err = assert_python_ok(self.script, '-c', code)
self.assertEqual(out, expected_out)

def test_operators(self):
code = "x = y + z;"
expected_out = bytes("IDENTIFIER('x', 1:1:2)\nEQUALS('=', 1:3:4)\nIDENTIFIER('y', 1:5:6)\nPLUS('+', 1:7:8)\nIDENTIFIER('z', 1:9:10)\nSEMI(';', 1:10:11)\n", 'utf-8')
rc, out, err = assert_python_ok(self.script, '-c', code)
self.assertEqual(out, expected_out)

def test_numbers(self):
code = "int num = 42;"
expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('num', 1:5:8)\nEQUALS('=', 1:9:10)\nNUMBER('42', 1:11:13)\nSEMI(';', 1:13:14)\n", 'utf-8')
rc, out, err = assert_python_ok(self.script, '-c', code)
self.assertEqual(out, expected_out)

def test_strings(self):
code = 'printf("Hello, World!");'
expected_out = bytes("""IDENTIFIER(\'printf\', 1:1:7)\nLPAREN(\'(\', 1:7:8)\nSTRING(\'"Hello, World!"\', 1:8:23)\nRPAREN(\')\', 1:23:24)\nSEMI(\';\', 1:24:25)\n""", 'utf-8')
rc, out, err = assert_python_ok(self.script, '-c', code)
self.assertEqual(out, expected_out)

def test_characters_with_escape_sequences(self):
code = "char a = '\n'; char b = '\x41'; char c = '\\';"
expected_out = bytes("""CHAR(\'char\', 1:1:5)\nIDENTIFIER(\'a\', 1:6:7)\nEQUALS(\'=\', 1:8:9)\nCHARACTER("\'\\n\'", 1:10:13)\nSEMI(\';\', 1:13:14)\nCHAR(\'char\', 1:15:19)\nIDENTIFIER(\'b\', 1:20:21)\nEQUALS(\'=\', 1:22:23)\nCHARACTER("\'A\'", 1:24:27)\nSEMI(\';\', 1:27:28)\nCHAR(\'char\', 1:29:33)\nIDENTIFIER(\'c\', 1:34:35)\nEQUALS(\'=\', 1:36:37)\nCHARACTER("\'", 1:38:39)\nBACKSLASH(\'\\\\\', 1:39:40)\nCHARACTER("\'", 1:40:41)\nSEMI(\';\', 1:41:42)\n""", 'utf-8')
rc, out, err = assert_python_ok(self.script, '-c', code)
self.assertEqual(out, expected_out)

if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TODOs in the cases generator lexer: dedent > 0 case in the to_text function + escape sequence handling
16 changes: 11 additions & 5 deletions Tools/cases_generator/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def choice(*opts: str) -> str:
string_char = r"""([^"\\\n]|""" + escape_sequence + ")"
str_re = '"' + string_char + '*"'
STRING = "STRING"
char = r"\'.\'" # TODO: escape sequence
char = r"\'([^'\\]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|\\.|\\\\)\'"
CHARACTER = "CHARACTER"

comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/"
Expand Down Expand Up @@ -344,7 +344,13 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text:
if dedent < 0:
text = text.replace("\n", "\n" + " " * -dedent)
# TODO: dedent > 0
elif dedent > 0:
temp: list[str] = []
for line in text.split("\n"):
leading_space = len(line) - len(line.lstrip(' '))
line = line[min(leading_space, dedent):]
temp.append(line)
text = "\n".join(temp)
res.append(text)
line, col = tkn.end
return "".join(res)
Expand All @@ -358,6 +364,6 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
src = sys.argv[2]
else:
src = open(filename).read()
# print(to_text(tokenize(src)))
for tkn in tokenize(src, filename=filename):
print(tkn)

dedent = int(sys.argv[3])
print(to_text(tokenize(src), dedent))
Loading