From 456ec98866b26920d978bfff86db86008fa0fd2b Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sat, 15 Mar 2025 11:48:44 +0400 Subject: [PATCH 1/5] Add tests for `tokenize` command-line interface --- Lib/test/test_tokenize.py | 83 +++++++++++++++++++++++++++++++++++++++ Lib/tokenize.py | 6 +-- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 5fa4e0d922ed08..d09719eae40586 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,5 +1,8 @@ +import contextlib +import itertools import os import re +import tempfile import token import tokenize import unittest @@ -3178,5 +3181,85 @@ def test_newline_at_the_end_of_buffer(self): run_test_script(file_name) +class CommandLineTest(unittest.TestCase): + def setUp(self): + self.filename = tempfile.mktemp() + self.addCleanup(os_helper.unlink, self.filename) + + @staticmethod + def text_normalize(string): + """Dedent *string* and strip it from its surrounding whitespaces. + + This method is used by the other utility functions so that any + string to write or to match against can be freely indented. + """ + def normalize_spaces(text): + return re.sub(r'\s+', ' ', text).strip() + return normalize_spaces(dedent(string)).strip() + + def set_source(self, content): + with open(self.filename, 'w') as fp: + fp.write(content) + + def invoke_tokenize(self, *flags): + output = StringIO() + with contextlib.redirect_stdout(output): + tokenize._main(args=[*flags, self.filename]) + return self.text_normalize(output.getvalue()) + + def check_output(self, source, expect, *flags): + with self.subTest(source=source, flags=flags): + self.set_source(source) + res = self.invoke_tokenize(*flags) + expect = self.text_normalize(expect) + self.assertListEqual(res.splitlines(), expect.splitlines()) + + def test_invocation(self): + # test various combinations of parameters + base_flags = ('-e', '--exact') + + self.set_source(''' + def f(): + print(x) + return None + ''') + + for flag in base_flags: + with self.subTest(args=flag): + _ = self.invoke_tokenize(flag) + + with self.assertRaises(SystemExit): + # suppress argparse error message + with contextlib.redirect_stderr(StringIO()): + _ = self.invoke_tokenize('--unknown') + + def test_without_flag(self): + # test 'python -m tokenize source.py' + source = 'a = 1\n' + expect = ''' + 0,0-0,0: ENCODING 'utf-8' + 1,0-1,1: NAME 'a' + 1,2-1,3: OP '=' + 1,4-1,5: NUMBER '1' + 1,5-1,6: NEWLINE '\\n' + 2,0-2,0: ENDMARKER '' + ''' + self.check_output(source, expect) + + def test_exact_flag(self): + # test 'python -m tokenize -e/--exact source.py' + source = 'a = 1\n' + expect = ''' + 0,0-0,0: ENCODING 'utf-8' + 1,0-1,1: NAME 'a' + 1,2-1,3: EQUAL '=' + 1,4-1,5: NUMBER '1' + 1,5-1,6: NEWLINE '\\n' + 2,0-2,0: ENDMARKER '' + ''' + for flag in ['-e', '--exact']: + self.check_output(source, expect, flag) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 9ce95a62d961ba..7afacff7381f1c 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -499,7 +499,7 @@ def generate_tokens(readline): """ return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True) -def main(): +def _main(args=None): import argparse # Helper error handling routines @@ -524,7 +524,7 @@ def error(message, filename=None, location=None): help='the file to tokenize; defaults to stdin') parser.add_argument('-e', '--exact', dest='exact', action='store_true', help='display token names using the exact type') - args = parser.parse_args() + args = parser.parse_args(args) try: # Tokenize the input @@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False) if __name__ == "__main__": - main() + _main() From 969a537e93b26464c910ba78ab8e7c2e7b95dcc4 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sat, 15 Mar 2025 13:36:54 +0400 Subject: [PATCH 2/5] remove unused import --- Lib/test/test_tokenize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index d09719eae40586..5294976d28dd39 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,5 +1,4 @@ import contextlib -import itertools import os import re import tempfile From 94ccd299b110acc6f2cd5db83e8a25123d0f6f05 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sat, 15 Mar 2025 13:43:35 +0400 Subject: [PATCH 3/5] Correct source and expect newline --- Lib/test/test_tokenize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 5294976d28dd39..245822e1e06b9c 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3198,7 +3198,7 @@ def normalize_spaces(text): def set_source(self, content): with open(self.filename, 'w') as fp: - fp.write(content) + fp.write(content+'\n') def invoke_tokenize(self, *flags): output = StringIO() @@ -3234,7 +3234,7 @@ def f(): def test_without_flag(self): # test 'python -m tokenize source.py' - source = 'a = 1\n' + source = 'a = 1' expect = ''' 0,0-0,0: ENCODING 'utf-8' 1,0-1,1: NAME 'a' @@ -3247,7 +3247,7 @@ def test_without_flag(self): def test_exact_flag(self): # test 'python -m tokenize -e/--exact source.py' - source = 'a = 1\n' + source = 'a = 1' expect = ''' 0,0-0,0: ENCODING 'utf-8' 1,0-1,1: NAME 'a' From 14b021174d524ccda699ae438b1247da200ad0ff Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sat, 15 Mar 2025 14:03:40 +0400 Subject: [PATCH 4/5] Remove \n from newline --- Lib/test/test_tokenize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 245822e1e06b9c..788386eb2d4f20 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3198,7 +3198,7 @@ def normalize_spaces(text): def set_source(self, content): with open(self.filename, 'w') as fp: - fp.write(content+'\n') + fp.write(content) def invoke_tokenize(self, *flags): output = StringIO() @@ -3240,7 +3240,7 @@ def test_without_flag(self): 1,0-1,1: NAME 'a' 1,2-1,3: OP '=' 1,4-1,5: NUMBER '1' - 1,5-1,6: NEWLINE '\\n' + 1,5-1,6: NEWLINE '' 2,0-2,0: ENDMARKER '' ''' self.check_output(source, expect) @@ -3253,7 +3253,7 @@ def test_exact_flag(self): 1,0-1,1: NAME 'a' 1,2-1,3: EQUAL '=' 1,4-1,5: NUMBER '1' - 1,5-1,6: NEWLINE '\\n' + 1,5-1,6: NEWLINE '' 2,0-2,0: ENDMARKER '' ''' for flag in ['-e', '--exact']: From aec9cf09e837b745bd4727f6f9c9b215662b84b3 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Thu, 27 Mar 2025 18:34:01 +0400 Subject: [PATCH 5/5] Fix normalize function --- Lib/test/test_tokenize.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 788386eb2d4f20..df2617c680b5e5 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3192,9 +3192,7 @@ def text_normalize(string): This method is used by the other utility functions so that any string to write or to match against can be freely indented. """ - def normalize_spaces(text): - return re.sub(r'\s+', ' ', text).strip() - return normalize_spaces(dedent(string)).strip() + return re.sub(r'\s+', ' ', string).strip() def set_source(self, content): with open(self.filename, 'w') as fp: