From 456ec98866b26920d978bfff86db86008fa0fd2b Mon Sep 17 00:00:00 2001
From: donBarbos <donbarbos@proton.me>
Date: Sat, 15 Mar 2025 11:48:44 +0400
Subject: [PATCH 1/5] Add tests for `tokenize` command-line interface

---
 Lib/test/test_tokenize.py | 83 +++++++++++++++++++++++++++++++++++++++
 Lib/tokenize.py           |  6 +--
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 5fa4e0d922ed08..d09719eae40586 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,5 +1,8 @@
+import contextlib
+import itertools
 import os
 import re
+import tempfile
 import token
 import tokenize
 import unittest
@@ -3178,5 +3181,85 @@ def test_newline_at_the_end_of_buffer(self):
             run_test_script(file_name)
 
 
+class CommandLineTest(unittest.TestCase):
+    def setUp(self):
+        self.filename = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, self.filename)
+
+    @staticmethod
+    def text_normalize(string):
+        """Dedent *string* and strip it from its surrounding whitespaces.
+
+        This method is used by the other utility functions so that any
+        string to write or to match against can be freely indented.
+        """
+        def normalize_spaces(text):
+            return re.sub(r'\s+', ' ', text).strip()
+        return normalize_spaces(dedent(string)).strip()
+
+    def set_source(self, content):
+        with open(self.filename, 'w') as fp:
+            fp.write(content)
+
+    def invoke_tokenize(self, *flags):
+        output = StringIO()
+        with contextlib.redirect_stdout(output):
+            tokenize._main(args=[*flags, self.filename])
+        return self.text_normalize(output.getvalue())
+
+    def check_output(self, source, expect, *flags):
+        with self.subTest(source=source, flags=flags):
+            self.set_source(source)
+            res = self.invoke_tokenize(*flags)
+            expect = self.text_normalize(expect)
+            self.assertListEqual(res.splitlines(), expect.splitlines())
+
+    def test_invocation(self):
+        # test various combinations of parameters
+        base_flags = ('-e', '--exact')
+
+        self.set_source('''
+            def f():
+                print(x)
+                return None
+        ''')
+
+        for flag in base_flags:
+            with self.subTest(args=flag):
+                _ = self.invoke_tokenize(flag)
+
+        with self.assertRaises(SystemExit):
+            # suppress argparse error message
+            with contextlib.redirect_stderr(StringIO()):
+                _ = self.invoke_tokenize('--unknown')
+
+    def test_without_flag(self):
+        # test 'python -m tokenize source.py'
+        source = 'a = 1\n'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            OP             '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        '\\n'
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        self.check_output(source, expect)
+
+    def test_exact_flag(self):
+        # test 'python -m tokenize -e/--exact source.py'
+        source = 'a = 1\n'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            EQUAL          '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        '\\n'
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        for flag in ['-e', '--exact']:
+            self.check_output(source, expect, flag)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 9ce95a62d961ba..7afacff7381f1c 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -499,7 +499,7 @@ def generate_tokens(readline):
     """
     return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
 
-def main():
+def _main(args=None):
     import argparse
 
     # Helper error handling routines
@@ -524,7 +524,7 @@ def error(message, filename=None, location=None):
                         help='the file to tokenize; defaults to stdin')
     parser.add_argument('-e', '--exact', dest='exact', action='store_true',
                         help='display token names using the exact type')
-    args = parser.parse_args()
+    args = parser.parse_args(args)
 
     try:
         # Tokenize the input
@@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)
 
 
 if __name__ == "__main__":
-    main()
+    _main()

From 969a537e93b26464c910ba78ab8e7c2e7b95dcc4 Mon Sep 17 00:00:00 2001
From: donBarbos <donbarbos@proton.me>
Date: Sat, 15 Mar 2025 13:36:54 +0400
Subject: [PATCH 2/5] remove unused import

---
 Lib/test/test_tokenize.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index d09719eae40586..5294976d28dd39 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,5 +1,4 @@
 import contextlib
-import itertools
 import os
 import re
 import tempfile

From 94ccd299b110acc6f2cd5db83e8a25123d0f6f05 Mon Sep 17 00:00:00 2001
From: donBarbos <donbarbos@proton.me>
Date: Sat, 15 Mar 2025 13:43:35 +0400
Subject: [PATCH 3/5] Correct source and expect newline

---
 Lib/test/test_tokenize.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 5294976d28dd39..245822e1e06b9c 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3198,7 +3198,7 @@ def normalize_spaces(text):
 
     def set_source(self, content):
         with open(self.filename, 'w') as fp:
-            fp.write(content)
+            fp.write(content+'\n')
 
     def invoke_tokenize(self, *flags):
         output = StringIO()
@@ -3234,7 +3234,7 @@ def f():
 
     def test_without_flag(self):
         # test 'python -m tokenize source.py'
-        source = 'a = 1\n'
+        source = 'a = 1'
         expect = '''
             0,0-0,0:            ENCODING       'utf-8'
             1,0-1,1:            NAME           'a'
@@ -3247,7 +3247,7 @@ def test_without_flag(self):
 
     def test_exact_flag(self):
         # test 'python -m tokenize -e/--exact source.py'
-        source = 'a = 1\n'
+        source = 'a = 1'
         expect = '''
             0,0-0,0:            ENCODING       'utf-8'
             1,0-1,1:            NAME           'a'

From 14b021174d524ccda699ae438b1247da200ad0ff Mon Sep 17 00:00:00 2001
From: donBarbos <donbarbos@proton.me>
Date: Sat, 15 Mar 2025 14:03:40 +0400
Subject: [PATCH 4/5] Remove \n from newline

---
 Lib/test/test_tokenize.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 245822e1e06b9c..788386eb2d4f20 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3198,7 +3198,7 @@ def normalize_spaces(text):
 
     def set_source(self, content):
         with open(self.filename, 'w') as fp:
-            fp.write(content+'\n')
+            fp.write(content)
 
     def invoke_tokenize(self, *flags):
         output = StringIO()
@@ -3240,7 +3240,7 @@ def test_without_flag(self):
             1,0-1,1:            NAME           'a'
             1,2-1,3:            OP             '='
             1,4-1,5:            NUMBER         '1'
-            1,5-1,6:            NEWLINE        '\\n'
+            1,5-1,6:            NEWLINE        ''
             2,0-2,0:            ENDMARKER      ''
         '''
         self.check_output(source, expect)
@@ -3253,7 +3253,7 @@ def test_exact_flag(self):
             1,0-1,1:            NAME           'a'
             1,2-1,3:            EQUAL          '='
             1,4-1,5:            NUMBER         '1'
-            1,5-1,6:            NEWLINE        '\\n'
+            1,5-1,6:            NEWLINE        ''
             2,0-2,0:            ENDMARKER      ''
         '''
         for flag in ['-e', '--exact']:

From aec9cf09e837b745bd4727f6f9c9b215662b84b3 Mon Sep 17 00:00:00 2001
From: donBarbos <donbarbos@proton.me>
Date: Thu, 27 Mar 2025 18:34:01 +0400
Subject: [PATCH 5/5] Fix normalize function

---
 Lib/test/test_tokenize.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 788386eb2d4f20..df2617c680b5e5 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3192,9 +3192,7 @@ def text_normalize(string):
         This method is used by the other utility functions so that any
         string to write or to match against can be freely indented.
         """
-        def normalize_spaces(text):
-            return re.sub(r'\s+', ' ', text).strip()
-        return normalize_spaces(dedent(string)).strip()
+        return re.sub(r'\s+', ' ', string).strip()
 
     def set_source(self, content):
         with open(self.filename, 'w') as fp: