python · ksfi · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023
diff --git a/Lib/test/test_tools/test_lexer.py b/Lib/test/test_tools/test_lexer.py
@@ -0,0 +1,75 @@
+"""Tests for scripts in the Tools directory.
+
+This file contains regression tests for some of the scripts found in the
+Tools directory of a Python checkout or tarball.
+"""
+
+import os
+import unittest
+from test.support.script_helper import assert_python_ok
+
+from test.test_tools import toolsdir, skip_if_missing
+
+skip_if_missing()
+
+class ReindentTests(unittest.TestCase):
+    script = os.path.join(toolsdir, 'cases_generator', 'lexer.py')
+
+    def test_multiline_comment_dedent_dedent4(self):
+        input_code = """
+        int main() {
+        /*
+            This is a
+            multi-line comment.
+            Let's see if it de-indents correctly.
+        */
+        return 0;
+    }
+
+        """
+
+        expected_output = """
+    int main() {
+    /*
+        This is a
+        multi-line comment.
+        Let's see if it de-indents correctly.
+    */
+    return 0;
+}
+"""
+
+        dedent_amount = '4'
+        rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount)
+        self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed")
+
+    def test_multiline_comment_dedent_dedent40(self):
+        input_code = """
+        int main() {
+        /*
+            This is a
+            multi-line comment.
+            Let's see if it de-indents correctly.
+        */
+        return 0;
+    }
+
+        """
+
+        expected_output = """
+int main() {
+/*
+This is a
+multi-line comment.
+Let's see if it de-indents correctly.
+*/
+return 0;
+}
+"""
+
+        dedent_amount = '40'
+        rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount)
+        self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Lib/test/test_tools/test_lexer_tokenize.py b/Lib/test/test_tools/test_lexer_tokenize.py
@@ -0,0 +1,50 @@
+"""Tests for scripts in the Tools directory.
+
+This file contains regression tests for some of the scripts found in the
+Tools directory of a Python checkout or tarball.
+"""
+
+import os
+import unittest
+from test.support.script_helper import assert_python_ok
+from test.support import findfile
+
+from test.test_tools import toolsdir, skip_if_missing
+
+skip_if_missing()
+
+class TokenizeTests(unittest.TestCase):
+    script = os.path.join(toolsdir, 'cases_generator', 'lexer.py')
+
+    def test_identifiers(self):
+        code = "int myVariable = 123;"
+        expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('myVariable', 1:5:15)\nEQUALS('=', 1:16:17)\nNUMBER('123', 1:18:21)\nSEMI(';', 1:21:22)\n", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_operators(self):
+        code = "x = y + z;"
+        expected_out = bytes("IDENTIFIER('x', 1:1:2)\nEQUALS('=', 1:3:4)\nIDENTIFIER('y', 1:5:6)\nPLUS('+', 1:7:8)\nIDENTIFIER('z', 1:9:10)\nSEMI(';', 1:10:11)\n", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_numbers(self):
+        code = "int num = 42;"
+        expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('num', 1:5:8)\nEQUALS('=', 1:9:10)\nNUMBER('42', 1:11:13)\nSEMI(';', 1:13:14)\n", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_strings(self):
+        code = 'printf("Hello, World!");'
+        expected_out = bytes("""IDENTIFIER(\'printf\', 1:1:7)\nLPAREN(\'(\', 1:7:8)\nSTRING(\'"Hello, World!"\', 1:8:23)\nRPAREN(\')\', 1:23:24)\nSEMI(\';\', 1:24:25)\n""", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_characters_with_escape_sequences(self):
+        code = "char a = '\n'; char b = '\x41'; char c = '\\';"
+        expected_out = bytes("""CHAR(\'char\', 1:1:5)\nIDENTIFIER(\'a\', 1:6:7)\nEQUALS(\'=\', 1:8:9)\nCHARACTER("\'\\n\'", 1:10:13)\nSEMI(\';\', 1:13:14)\nCHAR(\'char\', 1:15:19)\nIDENTIFIER(\'b\', 1:20:21)\nEQUALS(\'=\', 1:22:23)\nCHARACTER("\'A\'", 1:24:27)\nSEMI(\';\', 1:27:28)\nCHAR(\'char\', 1:29:33)\nIDENTIFIER(\'c\', 1:34:35)\nEQUALS(\'=\', 1:36:37)\nCHARACTER("\'", 1:38:39)\nBACKSLASH(\'\\\\\', 1:39:40)\nCHARACTER("\'", 1:40:41)\nSEMI(\';\', 1:41:42)\n""", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst b/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst
@@ -0,0 +1 @@
+TODOs in the cases generator lexer: dedent > 0 case in the to_text function + escape sequence handling
@@ -109,7 +109,7 @@ def choice(*opts: str) -> str:
 string_char = r"""([^"\\\n]|""" + escape_sequence + ")"
 str_re = '"' + string_char + '*"'
 STRING = "STRING"
-char = r"\'.\'"  # TODO: escape sequence
+char = r"\'([^'\\]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|\\.|\\\\)\'"
 CHARACTER = "CHARACTER"
 
 comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/"
@@ -344,7 +344,13 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
         if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text:
             if dedent < 0:
                 text = text.replace("\n", "\n" + " " * -dedent)
-            # TODO: dedent > 0
+            elif dedent > 0:
+                temp: list[str] = []
+                for line in text.split("\n"):
+                    leading_space = len(line) - len(line.lstrip(' '))
+                    line = line[min(leading_space, dedent):]
+                    temp.append(line)
+                text = "\n".join(temp)
         res.append(text)
         line, col = tkn.end
     return "".join(res)
@@ -358,6 +364,6 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
         src = sys.argv[2]
     else:
         src = open(filename).read()
-    # print(to_text(tokenize(src)))
-    for tkn in tokenize(src, filename=filename):
-        print(tkn)
+
+    dedent = int(sys.argv[3])
+    print(to_text(tokenize(src), dedent))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		TODOs in the cases generator lexer: dedent > 0 case in the to_text function + escape sequence handling