diff --git a/Lib/test/test_tools/test_lexer.py b/Lib/test/test_tools/test_lexer.py new file mode 100644 index 00000000000000..88350139189b76 --- /dev/null +++ b/Lib/test/test_tools/test_lexer.py @@ -0,0 +1,75 @@ +"""Tests for scripts in the Tools directory. + +This file contains regression tests for some of the scripts found in the +Tools directory of a Python checkout or tarball. +""" + +import os +import unittest +from test.support.script_helper import assert_python_ok + +from test.test_tools import toolsdir, skip_if_missing + +skip_if_missing() + +class ReindentTests(unittest.TestCase): + script = os.path.join(toolsdir, 'cases_generator', 'lexer.py') + + def test_multiline_comment_dedent_dedent4(self): + input_code = """ + int main() { + /* + This is a + multi-line comment. + Let's see if it de-indents correctly. + */ + return 0; + } + + """ + + expected_output = """ + int main() { + /* + This is a + multi-line comment. + Let's see if it de-indents correctly. + */ + return 0; +} +""" + + dedent_amount = '4' + rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount) + self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed") + + def test_multiline_comment_dedent_dedent40(self): + input_code = """ + int main() { + /* + This is a + multi-line comment. + Let's see if it de-indents correctly. + */ + return 0; + } + + """ + + expected_output = """ +int main() { +/* +This is a +multi-line comment. +Let's see if it de-indents correctly. +*/ +return 0; +} +""" + + dedent_amount = '40' + rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount) + self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed") + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_tools/test_lexer_tokenize.py b/Lib/test/test_tools/test_lexer_tokenize.py new file mode 100644 index 00000000000000..5abf0783a72762 --- /dev/null +++ b/Lib/test/test_tools/test_lexer_tokenize.py @@ -0,0 +1,50 @@ +"""Tests for scripts in the Tools directory. + +This file contains regression tests for some of the scripts found in the +Tools directory of a Python checkout or tarball. +""" + +import os +import unittest +from test.support.script_helper import assert_python_ok +from test.support import findfile + +from test.test_tools import toolsdir, skip_if_missing + +skip_if_missing() + +class TokenizeTests(unittest.TestCase): + script = os.path.join(toolsdir, 'cases_generator', 'lexer.py') + + def test_identifiers(self): + code = "int myVariable = 123;" + expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('myVariable', 1:5:15)\nEQUALS('=', 1:16:17)\nNUMBER('123', 1:18:21)\nSEMI(';', 1:21:22)\n", 'utf-8') + rc, out, err = assert_python_ok(self.script, '-c', code) + self.assertEqual(out, expected_out) + + def test_operators(self): + code = "x = y + z;" + expected_out = bytes("IDENTIFIER('x', 1:1:2)\nEQUALS('=', 1:3:4)\nIDENTIFIER('y', 1:5:6)\nPLUS('+', 1:7:8)\nIDENTIFIER('z', 1:9:10)\nSEMI(';', 1:10:11)\n", 'utf-8') + rc, out, err = assert_python_ok(self.script, '-c', code) + self.assertEqual(out, expected_out) + + def test_numbers(self): + code = "int num = 42;" + expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('num', 1:5:8)\nEQUALS('=', 1:9:10)\nNUMBER('42', 1:11:13)\nSEMI(';', 1:13:14)\n", 'utf-8') + rc, out, err = assert_python_ok(self.script, '-c', code) + self.assertEqual(out, expected_out) + + def test_strings(self): + code = 'printf("Hello, World!");' + expected_out = bytes("""IDENTIFIER(\'printf\', 1:1:7)\nLPAREN(\'(\', 1:7:8)\nSTRING(\'"Hello, World!"\', 1:8:23)\nRPAREN(\')\', 1:23:24)\nSEMI(\';\', 1:24:25)\n""", 'utf-8') + rc, out, err = assert_python_ok(self.script, '-c', code) + self.assertEqual(out, expected_out) + + def test_characters_with_escape_sequences(self): + code = "char a = '\n'; char b = '\x41'; char c = '\\';" + expected_out = bytes("""CHAR(\'char\', 1:1:5)\nIDENTIFIER(\'a\', 1:6:7)\nEQUALS(\'=\', 1:8:9)\nCHARACTER("\'\\n\'", 1:10:13)\nSEMI(\';\', 1:13:14)\nCHAR(\'char\', 1:15:19)\nIDENTIFIER(\'b\', 1:20:21)\nEQUALS(\'=\', 1:22:23)\nCHARACTER("\'A\'", 1:24:27)\nSEMI(\';\', 1:27:28)\nCHAR(\'char\', 1:29:33)\nIDENTIFIER(\'c\', 1:34:35)\nEQUALS(\'=\', 1:36:37)\nCHARACTER("\'", 1:38:39)\nBACKSLASH(\'\\\\\', 1:39:40)\nCHARACTER("\'", 1:40:41)\nSEMI(\';\', 1:41:42)\n""", 'utf-8') + rc, out, err = assert_python_ok(self.script, '-c', code) + self.assertEqual(out, expected_out) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst b/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst new file mode 100644 index 00000000000000..a24361024c6eb7 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst @@ -0,0 +1 @@ +TODOs in the cases generator lexer: dedent > 0 case in the to_text function + escape sequence handling diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index c3c2954a42083f..cd1076a53fd240 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -109,7 +109,7 @@ def choice(*opts: str) -> str: string_char = r"""([^"\\\n]|""" + escape_sequence + ")" str_re = '"' + string_char + '*"' STRING = "STRING" -char = r"\'.\'" # TODO: escape sequence +char = r"\'([^'\\]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|\\.|\\\\)\'" CHARACTER = "CHARACTER" comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/" @@ -344,7 +344,13 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str: if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text: if dedent < 0: text = text.replace("\n", "\n" + " " * -dedent) - # TODO: dedent > 0 + elif dedent > 0: + temp: list[str] = [] + for line in text.split("\n"): + leading_space = len(line) - len(line.lstrip(' ')) + line = line[min(leading_space, dedent):] + temp.append(line) + text = "\n".join(temp) res.append(text) line, col = tkn.end return "".join(res) @@ -358,6 +364,6 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str: src = sys.argv[2] else: src = open(filename).read() - # print(to_text(tokenize(src))) - for tkn in tokenize(src, filename=filename): - print(tkn) + + dedent = int(sys.argv[3]) + print(to_text(tokenize(src), dedent)) \ No newline at end of file