From 92ecf9c9fcaec90ec66f3c5c91448ef4a9f650a5 Mon Sep 17 00:00:00 2001
From: ksfi <kerian.yousfi@hotmail.fr>
Date: Thu, 28 Dec 2023 18:26:53 +0100
Subject: [PATCH 1/6] dedent > 0 + escape sequence

---
 Tools/cases_generator/lexer.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index c3c2954a42083f..90121f41c69250 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -109,7 +109,7 @@ def choice(*opts: str) -> str:
 string_char = r"""([^"\\\n]|""" + escape_sequence + ")"
 str_re = '"' + string_char + '*"'
 STRING = "STRING"
-char = r"\'.\'"  # TODO: escape sequence
+char = r"\'([^'\\]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|\\.|\\\\)\'"
 CHARACTER = "CHARACTER"
 
 comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/"
@@ -344,7 +344,16 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
         if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text:
             if dedent < 0:
                 text = text.replace("\n", "\n" + " " * -dedent)
-            # TODO: dedent > 0
+            elif dedent > 0:
+                ret = []
+                for line in text.split("\n"):
+                    leading_space = len(line) - len(line.lstrip())
+                    if leading_space > dedent:
+                        line = re.sub(r'(?m)^[ \t]{' + str(dedent) + r'}', '', line)
+                    else:
+                        line = re.sub(r'(?m)^[ \t]{' + str(leading_space) + r'}', '', line)
+                    ret.append(line)
+                text = "\n".join(ret)
         res.append(text)
         line, col = tkn.end
     return "".join(res)

From 0871ee8f1419c503cafb604fc56df0098bd11774 Mon Sep 17 00:00:00 2001
From: ksfi <kerian.yousfi@hotmail.fr>
Date: Thu, 28 Dec 2023 18:49:26 +0100
Subject: [PATCH 2/6] typing

---
 Tools/cases_generator/lexer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index 90121f41c69250..0f59208c479095 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -345,15 +345,15 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
             if dedent < 0:
                 text = text.replace("\n", "\n" + " " * -dedent)
             elif dedent > 0:
-                ret = []
+                temp: list[str] = []
                 for line in text.split("\n"):
-                    leading_space = len(line) - len(line.lstrip())
+                    leading_space: int = len(line) - len(line.lstrip())
                     if leading_space > dedent:
                         line = re.sub(r'(?m)^[ \t]{' + str(dedent) + r'}', '', line)
                     else:
                         line = re.sub(r'(?m)^[ \t]{' + str(leading_space) + r'}', '', line)
-                    ret.append(line)
-                text = "\n".join(ret)
+                    temp.append(line)
+                text = "\n".join(temp)
         res.append(text)
         line, col = tkn.end
     return "".join(res)

From a041e526fdcdda757ea87c3b06de0d15b9725640 Mon Sep 17 00:00:00 2001
From: ksfi <kerian.yousfi@hotmail.fr>
Date: Thu, 28 Dec 2023 19:01:37 +0100
Subject: [PATCH 3/6] less typing

---
 Tools/cases_generator/lexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index 0f59208c479095..020dfcc41da6ac 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -347,7 +347,7 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
             elif dedent > 0:
                 temp: list[str] = []
                 for line in text.split("\n"):
-                    leading_space: int = len(line) - len(line.lstrip())
+                    leading_space = len(line) - len(line.lstrip())
                     if leading_space > dedent:
                         line = re.sub(r'(?m)^[ \t]{' + str(dedent) + r'}', '', line)
                     else:

From 0ad22c3bce9cbbfd80e808942c6eb61b03e119d3 Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Thu, 28 Dec 2023 18:12:25 +0000
Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?=
 =?UTF-8?q?rb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst   | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst

diff --git a/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst b/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst
new file mode 100644
index 00000000000000..a24361024c6eb7
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2023-12-28-18-12-24.gh-issue-113547.TIwp20.rst
@@ -0,0 +1 @@
+TODOs in the cases generator lexer: dedent > 0 case in the to_text function + escape sequence handling

From 33cb7a02b47a6ef9d94e30e0eadc62eb5c81a37b Mon Sep 17 00:00:00 2001
From: ksfi <kerian.yousfi@hotmail.fr>
Date: Sun, 7 Jan 2024 20:54:06 +0100
Subject: [PATCH 5/6] simpler indent expression + tests

---
 Lib/test/test_tools/test_lexer_to_text.py  | 75 ++++++++++++++++++++++
 Lib/test/test_tools/test_lexer_tokenize.py | 50 +++++++++++++++
 Tools/cases_generator/lexer.py             | 16 ++---
 3 files changed, 133 insertions(+), 8 deletions(-)
 create mode 100644 Lib/test/test_tools/test_lexer_to_text.py
 create mode 100644 Lib/test/test_tools/test_lexer_tokenize.py

diff --git a/Lib/test/test_tools/test_lexer_to_text.py b/Lib/test/test_tools/test_lexer_to_text.py
new file mode 100644
index 00000000000000..88350139189b76
--- /dev/null
+++ b/Lib/test/test_tools/test_lexer_to_text.py
@@ -0,0 +1,75 @@
+"""Tests for scripts in the Tools directory.
+
+This file contains regression tests for some of the scripts found in the
+Tools directory of a Python checkout or tarball.
+"""
+
+import os
+import unittest
+from test.support.script_helper import assert_python_ok
+
+from test.test_tools import toolsdir, skip_if_missing
+
+skip_if_missing()
+
+class ReindentTests(unittest.TestCase):
+    script = os.path.join(toolsdir, 'cases_generator', 'lexer.py')
+
+    def test_multiline_comment_dedent_dedent4(self):
+        input_code = """
+        int main() {
+        /*
+            This is a
+            multi-line comment.
+            Let's see if it de-indents correctly.
+        */
+        return 0;
+    }
+
+        """
+
+        expected_output = """
+    int main() {
+    /*
+        This is a
+        multi-line comment.
+        Let's see if it de-indents correctly.
+    */
+    return 0;
+}
+"""
+
+        dedent_amount = '4'
+        rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount)
+        self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed")
+
+    def test_multiline_comment_dedent_dedent40(self):
+        input_code = """
+        int main() {
+        /*
+            This is a
+            multi-line comment.
+            Let's see if it de-indents correctly.
+        */
+        return 0;
+    }
+
+        """
+
+        expected_output = """
+int main() {
+/*
+This is a
+multi-line comment.
+Let's see if it de-indents correctly.
+*/
+return 0;
+}
+"""
+
+        dedent_amount = '40'
+        rc, out, err = assert_python_ok(self.script, '-c', input_code, dedent_amount)
+        self.assertEqual(out, bytes(expected_output, 'utf-8')[1:], "Multi-line comment de-indentation failed")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Lib/test/test_tools/test_lexer_tokenize.py b/Lib/test/test_tools/test_lexer_tokenize.py
new file mode 100644
index 00000000000000..5abf0783a72762
--- /dev/null
+++ b/Lib/test/test_tools/test_lexer_tokenize.py
@@ -0,0 +1,50 @@
+"""Tests for scripts in the Tools directory.
+
+This file contains regression tests for some of the scripts found in the
+Tools directory of a Python checkout or tarball.
+"""
+
+import os
+import unittest
+from test.support.script_helper import assert_python_ok
+from test.support import findfile
+
+from test.test_tools import toolsdir, skip_if_missing
+
+skip_if_missing()
+
+class TokenizeTests(unittest.TestCase):
+    script = os.path.join(toolsdir, 'cases_generator', 'lexer.py')
+
+    def test_identifiers(self):
+        code = "int myVariable = 123;"
+        expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('myVariable', 1:5:15)\nEQUALS('=', 1:16:17)\nNUMBER('123', 1:18:21)\nSEMI(';', 1:21:22)\n", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_operators(self):
+        code = "x = y + z;"
+        expected_out = bytes("IDENTIFIER('x', 1:1:2)\nEQUALS('=', 1:3:4)\nIDENTIFIER('y', 1:5:6)\nPLUS('+', 1:7:8)\nIDENTIFIER('z', 1:9:10)\nSEMI(';', 1:10:11)\n", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_numbers(self):
+        code = "int num = 42;"
+        expected_out = bytes("INT('int', 1:1:4)\nIDENTIFIER('num', 1:5:8)\nEQUALS('=', 1:9:10)\nNUMBER('42', 1:11:13)\nSEMI(';', 1:13:14)\n", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_strings(self):
+        code = 'printf("Hello, World!");'
+        expected_out = bytes("""IDENTIFIER(\'printf\', 1:1:7)\nLPAREN(\'(\', 1:7:8)\nSTRING(\'"Hello, World!"\', 1:8:23)\nRPAREN(\')\', 1:23:24)\nSEMI(\';\', 1:24:25)\n""", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+    def test_characters_with_escape_sequences(self):
+        code = "char a = '\n'; char b = '\x41'; char c = '\\';"
+        expected_out = bytes("""CHAR(\'char\', 1:1:5)\nIDENTIFIER(\'a\', 1:6:7)\nEQUALS(\'=\', 1:8:9)\nCHARACTER("\'\\n\'", 1:10:13)\nSEMI(\';\', 1:13:14)\nCHAR(\'char\', 1:15:19)\nIDENTIFIER(\'b\', 1:20:21)\nEQUALS(\'=\', 1:22:23)\nCHARACTER("\'A\'", 1:24:27)\nSEMI(\';\', 1:27:28)\nCHAR(\'char\', 1:29:33)\nIDENTIFIER(\'c\', 1:34:35)\nEQUALS(\'=\', 1:36:37)\nCHARACTER("\'", 1:38:39)\nBACKSLASH(\'\\\\\', 1:39:40)\nCHARACTER("\'", 1:40:41)\nSEMI(\';\', 1:41:42)\n""", 'utf-8')
+        rc, out, err = assert_python_ok(self.script, '-c', code)
+        self.assertEqual(out, expected_out)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index 020dfcc41da6ac..c416479fb2c1ae 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -347,11 +347,8 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
             elif dedent > 0:
                 temp: list[str] = []
                 for line in text.split("\n"):
-                    leading_space = len(line) - len(line.lstrip())
-                    if leading_space > dedent:
-                        line = re.sub(r'(?m)^[ \t]{' + str(dedent) + r'}', '', line)
-                    else:
-                        line = re.sub(r'(?m)^[ \t]{' + str(leading_space) + r'}', '', line)
+                    leading_space = len(line) - len(line.lstrip(' '))
+                    line = line[min(leading_space, dedent):]
                     temp.append(line)
                 text = "\n".join(temp)
         res.append(text)
@@ -367,6 +364,9 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
         src = sys.argv[2]
     else:
         src = open(filename).read()
-    # print(to_text(tokenize(src)))
-    for tkn in tokenize(src, filename=filename):
-        print(tkn)
+
+    dedent = int(sys.argv[3])
+    print(to_text(tokenize(src), dedent))
+
+    # for tkn in tokenize(src, filename=filename):
+    #     print(tkn)

From d7e0295c1345ee4ca3f346aed8972ede4dbf0558 Mon Sep 17 00:00:00 2001
From: ffffffff <jjjsuperj@outlook.com>
Date: Fri, 3 Oct 2025 18:09:29 +0200
Subject: [PATCH 6/6] change name test_lexer.py + dead code

---
 Lib/test/test_tools/{test_lexer_to_text.py => test_lexer.py} | 0
 Tools/cases_generator/lexer.py                               | 5 +----
 2 files changed, 1 insertion(+), 4 deletions(-)
 rename Lib/test/test_tools/{test_lexer_to_text.py => test_lexer.py} (100%)

diff --git a/Lib/test/test_tools/test_lexer_to_text.py b/Lib/test/test_tools/test_lexer.py
similarity index 100%
rename from Lib/test/test_tools/test_lexer_to_text.py
rename to Lib/test/test_tools/test_lexer.py
diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index c416479fb2c1ae..cd1076a53fd240 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -366,7 +366,4 @@ def to_text(tkns: list[Token], dedent: int = 0) -> str:
         src = open(filename).read()
 
     dedent = int(sys.argv[3])
-    print(to_text(tokenize(src), dedent))
-
-    # for tkn in tokenize(src, filename=filename):
-    #     print(tkn)
+    print(to_text(tokenize(src), dedent))
\ No newline at end of file