From ed8066885fc2fe764a44865aa33de50660a8e640 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 15:01:54 +0000 Subject: [PATCH 01/12] rm: ci --- .github/workflows/ci.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5147f457..8389b608 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,11 +39,6 @@ jobs: run: uv sync --locked --dev - name: Native Parser Tests run: uv run poe test - - name: Pure Parser Tests - env: - COVERAGE_FILE: .coverage.pure - LIBCST_PARSER_TYPE: pure - run: uv run poe test - name: Coverage run: | uv run coverage combine .coverage.pure From ece60e203b57b5bd8e7e06c87f9f5ba05a33aad1 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 16:08:09 +0000 Subject: [PATCH 02/12] rm: entry point --- libcst/_parser/entrypoints.py | 70 +++++++---------------------------- 1 file changed, 13 insertions(+), 57 deletions(-) diff --git a/libcst/_parser/entrypoints.py b/libcst/_parser/entrypoints.py index d9cee5e9..74c8f9a0 100644 --- a/libcst/_parser/entrypoints.py +++ b/libcst/_parser/entrypoints.py @@ -9,7 +9,6 @@ information """ -import os from functools import partial from typing import Union @@ -17,18 +16,13 @@ from libcst._nodes.expression import BaseExpression from libcst._nodes.module import Module from libcst._nodes.statement import BaseCompoundStatement, SimpleStatementLine -from libcst._parser.detect_config import convert_to_utf8, detect_config -from libcst._parser.grammar import get_grammar, validate_grammar -from libcst._parser.python_parser import PythonCSTParser +from libcst._parser.detect_config import convert_to_utf8 from libcst._parser.types.config import PartialParserConfig _DEFAULT_PARTIAL_PARSER_CONFIG: PartialParserConfig = PartialParserConfig() -def is_native() -> bool: - typ = os.environ.get("LIBCST_PARSER_TYPE") - return typ != "pure" - +from libcst import native def _parse( entrypoint: str, @@ -38,57 +32,19 @@ def _parse( detect_trailing_newline: bool, detect_default_newline: bool, ) -> CSTNode: - if is_native(): - from libcst.native import parse_expression, parse_module, parse_statement - - encoding, source_str = convert_to_utf8(source, partial=config) - - if entrypoint == "file_input": - parse = partial(parse_module, encoding=encoding) - elif entrypoint == "stmt_input": - parse = parse_statement - elif entrypoint == "expression_input": - parse = parse_expression - else: - raise ValueError(f"Unknown parser entry point: {entrypoint}") - - return parse(source_str) - return _pure_python_parse( - entrypoint, - source, - config, - detect_trailing_newline=detect_trailing_newline, - detect_default_newline=detect_default_newline, - ) + encoding, source_str = convert_to_utf8(source, partial=config) -def _pure_python_parse( - entrypoint: str, - source: Union[str, bytes], - config: PartialParserConfig, - *, - detect_trailing_newline: bool, - detect_default_newline: bool, -) -> CSTNode: - detection_result = detect_config( - source, - partial=config, - detect_trailing_newline=detect_trailing_newline, - detect_default_newline=detect_default_newline, - ) - validate_grammar() - grammar = get_grammar(config.parsed_python_version, config.future_imports) - - parser = PythonCSTParser( - tokens=detection_result.tokens, - config=detection_result.config, - pgen_grammar=grammar, - start_nonterminal=entrypoint, - ) - # The parser has an Any return type, we can at least refine it to CSTNode here. - result = parser.parse() - assert isinstance(result, CSTNode) - return result + if entrypoint == "file_input": + parse = partial(native.parse_module, encoding=encoding) + elif entrypoint == "stmt_input": + parse = native.parse_statement + elif entrypoint == "expression_input": + parse = native.parse_expression + else: + raise ValueError(f"Unknown parser entry point: {entrypoint}") + + return parse(source_str) def parse_module( From 4da1bef77ac3d3ed7a0ed543f8f145fe192bc3c3 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 16:17:36 +0000 Subject: [PATCH 03/12] fix: tests --- libcst/_nodes/tests/test_atom.py | 3 +-- libcst/_nodes/tests/test_binary_op.py | 3 +-- libcst/_nodes/tests/test_classdef.py | 3 --- libcst/_nodes/tests/test_dict.py | 3 +-- libcst/_nodes/tests/test_funcdef.py | 11 +---------- libcst/_nodes/tests/test_list.py | 3 +-- libcst/_nodes/tests/test_match.py | 5 +---- libcst/_nodes/tests/test_matrix_multiply.py | 3 +-- libcst/_nodes/tests/test_module.py | 4 ++-- libcst/_nodes/tests/test_set.py | 3 +-- libcst/_nodes/tests/test_try.py | 6 ++---- libcst/_nodes/tests/test_tuple.py | 4 ++-- libcst/_nodes/tests/test_type_alias.py | 6 +----- libcst/_nodes/tests/test_with.py | 14 +++++++------- libcst/_nodes/tests/test_yield.py | 4 ++-- libcst/_parser/tests/test_parse_errors.py | 4 +--- libcst/codemod/tests/test_codemod_cli.py | 16 +++++----------- libcst/metadata/tests/test_scope_provider.py | 16 +--------------- libcst/tests/__main__.py | 5 +---- libcst/tests/test_roundtrip.py | 4 +--- 20 files changed, 33 insertions(+), 87 deletions(-) diff --git a/libcst/_nodes/tests/test_atom.py b/libcst/_nodes/tests/test_atom.py index 82f7ab99..a33732c2 100644 --- a/libcst/_nodes/tests/test_atom.py +++ b/libcst/_nodes/tests/test_atom.py @@ -9,7 +9,6 @@ import libcst as cst from libcst import parse_expression from libcst._nodes.tests.base import CSTNodeTest, parse_expression_as -from libcst._parser.entrypoints import is_native from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -1184,7 +1183,7 @@ def test_invalid(self, **kwargs: Any) -> None: ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_binary_op.py b/libcst/_nodes/tests/test_binary_op.py index b86af9fe..fddc90e1 100644 --- a/libcst/_nodes/tests/test_binary_op.py +++ b/libcst/_nodes/tests/test_binary_op.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_expression from libcst._nodes.tests.base import CSTNodeTest -from libcst._parser.entrypoints import is_native from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -189,4 +188,4 @@ def test_invalid(self, **kwargs: Any) -> None: ) ) def test_parse_error(self, **kwargs: Any) -> None: - self.assert_parses(**kwargs, expect_success=not is_native()) + self.assert_parses(**kwargs, expect_success=False ) diff --git a/libcst/_nodes/tests/test_classdef.py b/libcst/_nodes/tests/test_classdef.py index cca36fbb..2e026a6c 100644 --- a/libcst/_nodes/tests/test_classdef.py +++ b/libcst/_nodes/tests/test_classdef.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest -from libcst._parser.entrypoints import is_native from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -210,8 +209,6 @@ def test_valid(self, **kwargs: Any) -> None: ) ) def test_valid_native(self, **kwargs: Any) -> None: - if not is_native(): - self.skipTest("Disabled for pure python parser") self.validate_node(**kwargs) @data_provider( diff --git a/libcst/_nodes/tests/test_dict.py b/libcst/_nodes/tests/test_dict.py index 1ee33332..47cb0663 100644 --- a/libcst/_nodes/tests/test_dict.py +++ b/libcst/_nodes/tests/test_dict.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_expression from libcst._nodes.tests.base import CSTNodeTest, parse_expression_as -from libcst._parser.entrypoints import is_native from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -188,6 +187,6 @@ def test_invalid(self, **kwargs: Any) -> None: ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_funcdef.py b/libcst/_nodes/tests/test_funcdef.py index 65a0ff07..4ed7fcc3 100644 --- a/libcst/_nodes/tests/test_funcdef.py +++ b/libcst/_nodes/tests/test_funcdef.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest, DummyIndentedBlock, parse_statement_as -from libcst._parser.entrypoints import is_native from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -741,8 +740,6 @@ class FunctionDefCreationTest(CSTNodeTest): ) ) def test_valid(self, **kwargs: Any) -> None: - if not is_native() and kwargs.get("native_only", False): - self.skipTest("Disabled for native parser") if "native_only" in kwargs: kwargs.pop("native_only") self.validate_node(**kwargs) @@ -891,8 +888,6 @@ def test_valid(self, **kwargs: Any) -> None: ) ) def test_valid_native(self, **kwargs: Any) -> None: - if not is_native(): - self.skipTest("Disabled for pure python parser") self.validate_node(**kwargs) @data_provider( @@ -2223,8 +2218,6 @@ def test_valid(self, node: cst.CSTNode, code: str) -> None: ) ) def test_valid_38(self, node: cst.CSTNode, code: str, **kwargs: Any) -> None: - if not is_native() and kwargs.get("native_only", False): - self.skipTest("disabled for pure python parser") self.validate_node(node, code, _parse_statement_force_38) @data_provider( @@ -2252,7 +2245,7 @@ def test_valid_38(self, node: cst.CSTNode, code: str, **kwargs: Any) -> None: ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) @@ -2271,6 +2264,4 @@ def test_versions(self, **kwargs: Any) -> None: ) ) def test_parse_error(self, **kwargs: Any) -> None: - if not is_native(): - self.skipTest("Skipped for non-native parser") self.assert_parses(**kwargs, expect_success=False, parser=parse_statement) diff --git a/libcst/_nodes/tests/test_list.py b/libcst/_nodes/tests/test_list.py index 43e22df7..2f96124c 100644 --- a/libcst/_nodes/tests/test_list.py +++ b/libcst/_nodes/tests/test_list.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_expression, parse_statement from libcst._nodes.tests.base import CSTNodeTest, parse_expression_as -from libcst._parser.entrypoints import is_native from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -126,6 +125,6 @@ def test_invalid( ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_match.py b/libcst/_nodes/tests/test_match.py index 2f1e4193..a3940053 100644 --- a/libcst/_nodes/tests/test_match.py +++ b/libcst/_nodes/tests/test_match.py @@ -8,12 +8,9 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest -from libcst._parser.entrypoints import is_native from libcst.testing.utils import data_provider -parser: Optional[Callable[[str], cst.CSTNode]] = ( - parse_statement if is_native() else None -) +parser: Optional[Callable[[str], cst.CSTNode]] = parse_statement class MatchTest(CSTNodeTest): diff --git a/libcst/_nodes/tests/test_matrix_multiply.py b/libcst/_nodes/tests/test_matrix_multiply.py index 5b4b8668..500b7aab 100644 --- a/libcst/_nodes/tests/test_matrix_multiply.py +++ b/libcst/_nodes/tests/test_matrix_multiply.py @@ -11,7 +11,6 @@ parse_expression_as, parse_statement_as, ) -from libcst._parser.entrypoints import is_native from libcst.testing.utils import data_provider @@ -70,6 +69,6 @@ def test_valid(self, **kwargs: Any) -> None: ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_module.py b/libcst/_nodes/tests/test_module.py index 5b33c6b7..40de8f8e 100644 --- a/libcst/_nodes/tests/test_module.py +++ b/libcst/_nodes/tests/test_module.py @@ -8,7 +8,7 @@ import libcst as cst from libcst import parse_module, parse_statement from libcst._nodes.tests.base import CSTNodeTest -from libcst._parser.entrypoints import is_native + from libcst.metadata import CodeRange, MetadataWrapper, PositionProvider from libcst.testing.utils import data_provider @@ -117,7 +117,7 @@ def test_code_for_node( def test_parser( self, *, code: str, expected: cst.Module, enabled_for_native: bool = True ) -> None: - if is_native() and not enabled_for_native: + if not enabled_for_native: self.skipTest("Disabled for native parser") self.assertEqual(parse_module(code), expected) diff --git a/libcst/_nodes/tests/test_set.py b/libcst/_nodes/tests/test_set.py index 335a4d3a..699b458a 100644 --- a/libcst/_nodes/tests/test_set.py +++ b/libcst/_nodes/tests/test_set.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_expression from libcst._nodes.tests.base import CSTNodeTest, parse_expression_as -from libcst._parser.entrypoints import is_native from libcst.testing.utils import data_provider @@ -133,6 +132,6 @@ def test_invalid( ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_try.py b/libcst/_nodes/tests/test_try.py index a2e8a433..8aea3643 100644 --- a/libcst/_nodes/tests/test_try.py +++ b/libcst/_nodes/tests/test_try.py @@ -8,13 +8,11 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest, DummyIndentedBlock -from libcst._parser.entrypoints import is_native + from libcst.metadata import CodeRange from libcst.testing.utils import data_provider -native_parse_statement: Optional[Callable[[str], cst.CSTNode]] = ( - parse_statement if is_native() else None -) +native_parse_statement: Optional[Callable[[str], cst.CSTNode]] = parse_statement class TryTest(CSTNodeTest): diff --git a/libcst/_nodes/tests/test_tuple.py b/libcst/_nodes/tests/test_tuple.py index 0055055c..78d16b8c 100644 --- a/libcst/_nodes/tests/test_tuple.py +++ b/libcst/_nodes/tests/test_tuple.py @@ -8,7 +8,7 @@ import libcst as cst from libcst import parse_expression, parse_statement from libcst._nodes.tests.base import CSTNodeTest, parse_expression_as -from libcst._parser.entrypoints import is_native + from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -286,6 +286,6 @@ def test_invalid( ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_type_alias.py b/libcst/_nodes/tests/test_type_alias.py index aa26103b..3dc0ca68 100644 --- a/libcst/_nodes/tests/test_type_alias.py +++ b/libcst/_nodes/tests/test_type_alias.py @@ -8,7 +8,7 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest -from libcst._parser.entrypoints import is_native + from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -132,8 +132,6 @@ class TypeAliasCreationTest(CSTNodeTest): ) ) def test_valid(self, **kwargs: Any) -> None: - if not is_native(): - self.skipTest("Disabled in the old parser") self.validate_node(**kwargs) @@ -252,6 +250,4 @@ class TypeAliasParserTest(CSTNodeTest): ) ) def test_valid(self, **kwargs: Any) -> None: - if not is_native(): - self.skipTest("Disabled in the old parser") self.validate_node(**kwargs) diff --git a/libcst/_nodes/tests/test_with.py b/libcst/_nodes/tests/test_with.py index 517ce357..e775ebad 100644 --- a/libcst/_nodes/tests/test_with.py +++ b/libcst/_nodes/tests/test_with.py @@ -9,7 +9,7 @@ from libcst import parse_statement, PartialParserConfig from libcst._maybe_sentinel import MaybeSentinel from libcst._nodes.tests.base import CSTNodeTest, DummyIndentedBlock, parse_statement_as -from libcst._parser.entrypoints import is_native + from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -187,14 +187,14 @@ class WithTest(CSTNodeTest): cst.WithItem( cst.Call( cst.Name("context_mgr"), - lpar=() if is_native() else (cst.LeftParen(),), - rpar=() if is_native() else (cst.RightParen(),), + lpar=(), + rpar=(), ) ), ), cst.SimpleStatementSuite((cst.Pass(),)), - lpar=(cst.LeftParen() if is_native() else MaybeSentinel.DEFAULT), - rpar=(cst.RightParen() if is_native() else MaybeSentinel.DEFAULT), + lpar=(cst.LeftParen()), + rpar=(cst.RightParen()), whitespace_after_with=cst.SimpleWhitespace(""), ), "code": "with(context_mgr()): pass\n", @@ -233,7 +233,7 @@ class WithTest(CSTNodeTest): rpar=cst.RightParen(whitespace_before=cst.SimpleWhitespace(" ")), ), "code": ("with ( foo(),\n" " bar(), ): pass\n"), # noqa - "parser": parse_statement if is_native() else None, + "parser": parse_statement, "expected_position": CodeRange((1, 0), (2, 21)), }, ) @@ -310,7 +310,7 @@ def test_invalid(self, **kwargs: Any) -> None: ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_nodes/tests/test_yield.py b/libcst/_nodes/tests/test_yield.py index 22a18872..aacb2da0 100644 --- a/libcst/_nodes/tests/test_yield.py +++ b/libcst/_nodes/tests/test_yield.py @@ -8,7 +8,7 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest, parse_statement_as -from libcst._parser.entrypoints import is_native + from libcst.helpers import ensure_type from libcst.metadata import CodeRange from libcst.testing.utils import data_provider @@ -241,6 +241,6 @@ def test_valid( ) ) def test_versions(self, **kwargs: Any) -> None: - if is_native() and not kwargs.get("expect_success", True): + if not kwargs.get("expect_success", True): self.skipTest("parse errors are disabled for native parser") self.assert_parses(**kwargs) diff --git a/libcst/_parser/tests/test_parse_errors.py b/libcst/_parser/tests/test_parse_errors.py index 0a058898..2af51db1 100644 --- a/libcst/_parser/tests/test_parse_errors.py +++ b/libcst/_parser/tests/test_parse_errors.py @@ -10,7 +10,7 @@ import libcst as cst from libcst._nodes.base import CSTValidationError -from libcst._parser.entrypoints import is_native + from libcst.testing.utils import data_provider, UnitTest @@ -174,8 +174,6 @@ def test_parser_syntax_error_str( parse_fn() # make sure str() doesn't blow up self.assertIn("Syntax Error", str(cm.exception)) - if not is_native(): - self.assertEqual(str(cm.exception), expected) def test_native_fallible_into_py(self) -> None: with patch("libcst._nodes.expression.Name._validate") as await_validate: diff --git a/libcst/codemod/tests/test_codemod_cli.py b/libcst/codemod/tests/test_codemod_cli.py index 18dab870..90291527 100644 --- a/libcst/codemod/tests/test_codemod_cli.py +++ b/libcst/codemod/tests/test_codemod_cli.py @@ -12,7 +12,7 @@ from pathlib import Path from unittest import skipIf -from libcst._parser.entrypoints import is_native + from libcst.codemod import CodemodTest from libcst.testing.utils import UnitTest @@ -37,16 +37,10 @@ def test_codemod_formatter_error_input(self) -> None: stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - if not is_native(): - self.assertIn( - "ParserSyntaxError: Syntax Error @ 14:11.", - rlt.stderr.decode("utf-8"), - ) - else: - self.assertIn( - "error: cannot format -: Cannot parse for target version Python 3.6: 13:10: async with AsyncExitStack() as stack:", - rlt.stderr.decode("utf-8"), - ) + self.assertIn( + "error: cannot format -: Cannot parse for target version Python 3.6: 13:10: async with AsyncExitStack() as stack:", + rlt.stderr.decode("utf-8"), + ) def test_codemod_external(self) -> None: # Test running the NOOP command as an "external command" diff --git a/libcst/metadata/tests/test_scope_provider.py b/libcst/metadata/tests/test_scope_provider.py index fd23e993..9afa6cba 100644 --- a/libcst/metadata/tests/test_scope_provider.py +++ b/libcst/metadata/tests/test_scope_provider.py @@ -11,7 +11,7 @@ import libcst as cst from libcst import ensure_type -from libcst._parser.entrypoints import is_native + from libcst.metadata import MetadataWrapper from libcst.metadata.scope_provider import ( _gen_dotted_names, @@ -2029,8 +2029,6 @@ def something(): ) def test_type_alias_scope(self) -> None: - if not is_native(): - self.skipTest("type aliases are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ type A = C @@ -2052,8 +2050,6 @@ def test_type_alias_scope(self) -> None: self.assertIsInstance(scopes[alias.value], AnnotationScope) def test_type_alias_param(self) -> None: - if not is_native(): - self.skipTest("type parameters are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ B = int @@ -2084,8 +2080,6 @@ def test_type_alias_param(self) -> None: ) def test_type_alias_tuple_and_paramspec(self) -> None: - if not is_native(): - self.skipTest("type parameters are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ type A[*T] = T @@ -2113,8 +2107,6 @@ def test_type_alias_tuple_and_paramspec(self) -> None: self.assertEqual(t_refs[0].node, alias_paramspec.value) def test_class_type_params(self) -> None: - if not is_native(): - self.skipTest("type parameters are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ class W[T]: @@ -2149,8 +2141,6 @@ def g[T]() -> T: pass self.assertEqual(t_refs_in_g[0].node, g.returns.annotation) def test_nested_class_type_params(self) -> None: - if not is_native(): - self.skipTest("type parameters are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ class Outer: @@ -2168,8 +2158,6 @@ class Nested[T: Outer]: pass ) def test_annotation_refers_to_nested_class(self) -> None: - if not is_native(): - self.skipTest("type parameters are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ class Outer: @@ -2229,8 +2217,6 @@ def meth2[T](self, arg: Nested): pass ) def test_body_isnt_subject_to_special_annotation_rule(self) -> None: - if not is_native(): - self.skipTest("type parameters are only supported in the native parser") m, scopes = get_scope_metadata_provider( """ class Outer: diff --git a/libcst/tests/__main__.py b/libcst/tests/__main__.py index 44e6bbe0..4f91e998 100644 --- a/libcst/tests/__main__.py +++ b/libcst/tests/__main__.py @@ -5,11 +5,8 @@ from unittest import main -from libcst._parser.entrypoints import is_native if __name__ == "__main__": - parser_type = "native" if is_native() else "pure" - print(f"running tests with {parser_type!r} parser") - + print(f"running tests with native parser") main(module=None, verbosity=2) diff --git a/libcst/tests/test_roundtrip.py b/libcst/tests/test_roundtrip.py index d5da81f2..96d1e507 100644 --- a/libcst/tests/test_roundtrip.py +++ b/libcst/tests/test_roundtrip.py @@ -8,7 +8,7 @@ from unittest import TestCase from libcst import CSTTransformer, parse_module -from libcst._parser.entrypoints import is_native + fixtures: Path = Path(__file__).parent.parent.parent / "native/libcst/tests/fixtures" @@ -19,8 +19,6 @@ class NOOPTransformer(CSTTransformer): class RoundTripTests(TestCase): def _get_fixtures(self) -> list[Path]: - if not is_native(): - self.skipTest("pure python parser doesn't work with this") self.assertTrue(fixtures.exists(), f"{fixtures} should exist") files = list(fixtures.iterdir()) self.assertGreater(len(files), 0) From 36c41bbfe1bc31ac8c8c9e96dfa956ec3285788d Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 16:24:15 +0000 Subject: [PATCH 04/12] lint: run format --- libcst/_nodes/tests/test_binary_op.py | 2 +- libcst/_parser/entrypoints.py | 1 + libcst/codemod/tests/test_codemod_cli.py | 1 - libcst/tests/__main__.py | 1 - 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/libcst/_nodes/tests/test_binary_op.py b/libcst/_nodes/tests/test_binary_op.py index fddc90e1..f6b40daf 100644 --- a/libcst/_nodes/tests/test_binary_op.py +++ b/libcst/_nodes/tests/test_binary_op.py @@ -188,4 +188,4 @@ def test_invalid(self, **kwargs: Any) -> None: ) ) def test_parse_error(self, **kwargs: Any) -> None: - self.assert_parses(**kwargs, expect_success=False ) + self.assert_parses(**kwargs, expect_success=False) diff --git a/libcst/_parser/entrypoints.py b/libcst/_parser/entrypoints.py index 74c8f9a0..965b8e22 100644 --- a/libcst/_parser/entrypoints.py +++ b/libcst/_parser/entrypoints.py @@ -24,6 +24,7 @@ from libcst import native + def _parse( entrypoint: str, source: Union[str, bytes], diff --git a/libcst/codemod/tests/test_codemod_cli.py b/libcst/codemod/tests/test_codemod_cli.py index 90291527..9798b071 100644 --- a/libcst/codemod/tests/test_codemod_cli.py +++ b/libcst/codemod/tests/test_codemod_cli.py @@ -12,7 +12,6 @@ from pathlib import Path from unittest import skipIf - from libcst.codemod import CodemodTest from libcst.testing.utils import UnitTest diff --git a/libcst/tests/__main__.py b/libcst/tests/__main__.py index 4f91e998..61403c27 100644 --- a/libcst/tests/__main__.py +++ b/libcst/tests/__main__.py @@ -6,7 +6,6 @@ from unittest import main - if __name__ == "__main__": print(f"running tests with native parser") main(module=None, verbosity=2) From d425206a0dfe290a1a053b284158627d061b7b32 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 18:37:03 +0000 Subject: [PATCH 05/12] rm: trim detec_config --- libcst/_parser/detect_config.py | 171 +------------------------------- 1 file changed, 2 insertions(+), 169 deletions(-) diff --git a/libcst/_parser/detect_config.py b/libcst/_parser/detect_config.py index 375a4f07..c989bc73 100644 --- a/libcst/_parser/detect_config.py +++ b/libcst/_parser/detect_config.py @@ -4,36 +4,12 @@ # LICENSE file in the root directory of this source tree. -import itertools -import re -from dataclasses import dataclass from io import BytesIO from tokenize import detect_encoding as py_tokenize_detect_encoding -from typing import FrozenSet, Iterable, Iterator, Pattern, Set, Tuple, Union +from typing import Tuple, Union -from libcst._nodes.whitespace import NEWLINE_RE -from libcst._parser.parso.python.token import PythonTokenTypes, TokenType -from libcst._parser.parso.utils import split_lines -from libcst._parser.types.config import AutoConfig, ParserConfig, PartialParserConfig -from libcst._parser.types.token import Token -from libcst._parser.wrapped_tokenize import tokenize_lines +from libcst._parser.types.config import AutoConfig, PartialParserConfig -_INDENT: TokenType = PythonTokenTypes.INDENT -_NAME: TokenType = PythonTokenTypes.NAME -_NEWLINE: TokenType = PythonTokenTypes.NEWLINE -_STRING: TokenType = PythonTokenTypes.STRING - -_FALLBACK_DEFAULT_NEWLINE = "\n" -_FALLBACK_DEFAULT_INDENT = " " -_CONTINUATION_RE: Pattern[str] = re.compile(r"\\(\r\n?|\n)", re.UNICODE) - - -@dataclass(frozen=True) -class ConfigDetectionResult: - # The config is a set of constant values used by the parser. - config: ParserConfig - # The tokens iterator is mutated by the parser. - tokens: Iterator[Token] def _detect_encoding(source: Union[str, bytes]) -> str: @@ -49,71 +25,6 @@ def _detect_encoding(source: Union[str, bytes]) -> str: return py_tokenize_detect_encoding(BytesIO(source).readline)[0] -def _detect_default_newline(source_str: str) -> str: - """ - Finds the first newline, and uses that value as the default newline. - """ - # Don't use `NEWLINE_RE` for this, because it might match multiple newlines as a - # single newline. - match = NEWLINE_RE.search(source_str) - return match.group(0) if match is not None else _FALLBACK_DEFAULT_NEWLINE - - -def _detect_indent(tokens: Iterable[Token]) -> str: - """ - Finds the first INDENT token, and uses that as the value of the default indent. - """ - try: - first_indent = next(t for t in tokens if t.type is _INDENT) - except StopIteration: - return _FALLBACK_DEFAULT_INDENT - first_indent_str = first_indent.relative_indent - assert first_indent_str is not None, "INDENT tokens must contain a relative_indent" - return first_indent_str - - -def _detect_trailing_newline(source_str: str) -> bool: - if len(source_str) == 0 or not NEWLINE_RE.fullmatch(source_str[-1]): - return False - # Make sure that the last newline wasn't following a continuation - return not ( - _CONTINUATION_RE.fullmatch(source_str[-2:]) - or _CONTINUATION_RE.fullmatch(source_str[-3:]) - ) - - -def _detect_future_imports(tokens: Iterable[Token]) -> FrozenSet[str]: - """ - Finds __future__ imports in their proper locations. - - See `https://www.python.org/dev/peps/pep-0236/`_ - """ - future_imports: Set[str] = set() - state = 0 - for tok in tokens: - if state == 0 and tok.type in (_STRING, _NEWLINE): - continue - elif state == 0 and tok.string == "from": - state = 1 - elif state == 1 and tok.string == "__future__": - state = 2 - elif state == 2 and tok.string == "import": - state = 3 - elif state == 3 and tok.string == "as": - state = 4 - elif state == 3 and tok.type == _NAME: - future_imports.add(tok.string) - elif state == 4 and tok.type == _NAME: - state = 3 - elif state == 3 and tok.string in "(),": - continue - elif state == 3 and tok.type == _NEWLINE: - state = 0 - else: - break - return frozenset(future_imports) - - def convert_to_utf8( source: Union[str, bytes], *, partial: PartialParserConfig ) -> Tuple[str, str]: @@ -130,81 +41,3 @@ def convert_to_utf8( source_str = source if isinstance(source, str) else source.decode(encoding) return (encoding, source_str) - -def detect_config( - source: Union[str, bytes], - *, - partial: PartialParserConfig, - detect_trailing_newline: bool, - detect_default_newline: bool, -) -> ConfigDetectionResult: - """ - Computes a ParserConfig given the current source code to be parsed and a partial - config. - """ - - python_version = partial.parsed_python_version - - encoding, source_str = convert_to_utf8(source, partial=partial) - - partial_default_newline = partial.default_newline - default_newline = ( - ( - _detect_default_newline(source_str) - if detect_default_newline - else _FALLBACK_DEFAULT_NEWLINE - ) - if isinstance(partial_default_newline, AutoConfig) - else partial_default_newline - ) - - # HACK: The grammar requires a trailing newline, but python doesn't actually require - # a trailing newline. Add one onto the end to make the parser happy. We'll strip it - # out again during cst.Module's codegen. - # - # I think parso relies on error recovery support to handle this, which we don't - # have. lib2to3 doesn't handle this case at all AFAICT. - has_trailing_newline = detect_trailing_newline and _detect_trailing_newline( - source_str - ) - if detect_trailing_newline and not has_trailing_newline: - source_str += default_newline - - lines = split_lines(source_str, keepends=True) - - tokens = tokenize_lines(source_str, lines, python_version) - - partial_default_indent = partial.default_indent - if isinstance(partial_default_indent, AutoConfig): - # We need to clone `tokens` before passing it to `_detect_indent`, because - # `_detect_indent` consumes some tokens, mutating `tokens`. - # - # Implementation detail: CPython's `itertools.tee` uses weakrefs to reduce the - # size of its FIFO, so this doesn't retain items (leak memory) for `tokens_dup` - # once `token_dup` is freed at the end of this method (subject to - # GC/refcounting). - tokens, tokens_dup = itertools.tee(tokens) - default_indent = _detect_indent(tokens_dup) - else: - default_indent = partial_default_indent - - partial_future_imports = partial.future_imports - if isinstance(partial_future_imports, AutoConfig): - # Same note as above re itertools.tee, we will consume tokens. - tokens, tokens_dup = itertools.tee(tokens) - future_imports = _detect_future_imports(tokens_dup) - else: - future_imports = partial_future_imports - - return ConfigDetectionResult( - config=ParserConfig( - lines=lines, - encoding=encoding, - default_indent=default_indent, - default_newline=default_newline, - has_trailing_newline=has_trailing_newline, - version=python_version, - future_imports=future_imports, - ), - tokens=tokens, - ) From 52bdb87f142cf3285bcc24428b52c58e3f0c7903 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 18:46:11 +0000 Subject: [PATCH 06/12] rm: nuke 1 --- libcst/_parser/__init__.py | 1 + libcst/_parser/_parsing_check.py | 53 - libcst/_parser/base_parser.py | 215 -- libcst/_parser/custom_itertools.py | 17 - libcst/_parser/grammar.py | 413 ---- libcst/_parser/production_decorator.py | 55 - libcst/_parser/py_whitespace_parser.py | 271 --- libcst/_parser/python_parser.py | 47 - libcst/_parser/tests/__init__.py | 4 - libcst/_parser/tests/test_config.py | 36 - libcst/_parser/tests/test_detect_config.py | 331 --- libcst/_parser/tests/test_footer_behavior.py | 232 -- libcst/_parser/tests/test_node_identity.py | 46 - libcst/_parser/tests/test_parse_errors.py | 182 -- libcst/_parser/tests/test_version_compare.py | 45 - .../_parser/tests/test_whitespace_parser.py | 228 -- libcst/_parser/tests/test_wrapped_tokenize.py | 1906 ----------------- libcst/_parser/whitespace_parser.py | 29 - libcst/_parser/wrapped_tokenize.py | 225 -- 19 files changed, 1 insertion(+), 4335 deletions(-) delete mode 100644 libcst/_parser/_parsing_check.py delete mode 100644 libcst/_parser/base_parser.py delete mode 100644 libcst/_parser/custom_itertools.py delete mode 100644 libcst/_parser/grammar.py delete mode 100644 libcst/_parser/production_decorator.py delete mode 100644 libcst/_parser/py_whitespace_parser.py delete mode 100644 libcst/_parser/python_parser.py delete mode 100644 libcst/_parser/tests/__init__.py delete mode 100644 libcst/_parser/tests/test_config.py delete mode 100644 libcst/_parser/tests/test_detect_config.py delete mode 100644 libcst/_parser/tests/test_footer_behavior.py delete mode 100644 libcst/_parser/tests/test_node_identity.py delete mode 100644 libcst/_parser/tests/test_parse_errors.py delete mode 100644 libcst/_parser/tests/test_version_compare.py delete mode 100644 libcst/_parser/tests/test_whitespace_parser.py delete mode 100644 libcst/_parser/tests/test_wrapped_tokenize.py delete mode 100644 libcst/_parser/whitespace_parser.py delete mode 100644 libcst/_parser/wrapped_tokenize.py diff --git a/libcst/_parser/__init__.py b/libcst/_parser/__init__.py index 7bec24cb..ad574fef 100644 --- a/libcst/_parser/__init__.py +++ b/libcst/_parser/__init__.py @@ -2,3 +2,4 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +__all__= [] \ No newline at end of file diff --git a/libcst/_parser/_parsing_check.py b/libcst/_parser/_parsing_check.py deleted file mode 100644 index 03283c95..00000000 --- a/libcst/_parser/_parsing_check.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Iterable, Union - -from libcst._exceptions import EOFSentinel -from libcst._parser.parso.pgen2.generator import ReservedString -from libcst._parser.parso.python.token import PythonTokenTypes, TokenType -from libcst._parser.types.token import Token - -_EOF_STR: str = "end of file (EOF)" -_INDENT_STR: str = "an indent" -_DEDENT_STR: str = "a dedent" - - -def get_expected_str( - encountered: Union[Token, EOFSentinel], - expected: Union[Iterable[Union[TokenType, ReservedString]], EOFSentinel], -) -> str: - if ( - isinstance(encountered, EOFSentinel) - or encountered.type is PythonTokenTypes.ENDMARKER - ): - encountered_str = _EOF_STR - elif encountered.type is PythonTokenTypes.INDENT: - encountered_str = _INDENT_STR - elif encountered.type is PythonTokenTypes.DEDENT: - encountered_str = _DEDENT_STR - else: - encountered_str = repr(encountered.string) - - if isinstance(expected, EOFSentinel): - expected_names = [_EOF_STR] - else: - expected_names = sorted( - [ - repr(el.name) if isinstance(el, TokenType) else repr(el.value) - for el in expected - ] - ) - - if len(expected_names) > 10: - # There's too many possibilities, so it's probably not useful to list them. - # Instead, let's just abbreviate the message. - return f"Unexpectedly encountered {encountered_str}." - else: - if len(expected_names) == 1: - expected_str = expected_names[0] - else: - expected_str = f"{', '.join(expected_names[:-1])}, or {expected_names[-1]}" - return f"Encountered {encountered_str}, but expected {expected_str}." diff --git a/libcst/_parser/base_parser.py b/libcst/_parser/base_parser.py deleted file mode 100644 index d349bb14..00000000 --- a/libcst/_parser/base_parser.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright David Halter and Contributors -# Modifications are dual-licensed: MIT and PSF. -# 99% of the code is different from pgen2, now. - -# A fork of `parso.parser`. -# https://github.com/davidhalter/parso/blob/v0.3.4/parso/parser.py -# -# The following changes were made: -# - Typing was added. -# - Error recovery is removed. -# - The Jedi-specific _allowed_transition_names_and_token_types API is removed. -# - Improved error messages by using our exceptions module. -# - node_map/leaf_map were removed in favor of just calling convert_*. -# - convert_node/convert_leaf were renamed to convert_nonterminal/convert_terminal -# - convert_nonterminal is called regardless of the number of children. Parso avoids -# calling it in some cases to avoid creating extra nodes. -# - The parser is constructed with the tokens to allow us to track a bit more state. As -# As a consequence parser may only be used once. -# - Supports our custom Token class, instead of `parso.python.tokenize.Token`. - - -from dataclasses import dataclass, field -from typing import Generic, Iterable, List, Sequence, TypeVar, Union - -from libcst._exceptions import EOFSentinel, ParserSyntaxError, PartialParserSyntaxError -from libcst._parser._parsing_check import get_expected_str -from libcst._parser.parso.pgen2.generator import DFAState, Grammar, ReservedString -from libcst._parser.parso.python.token import TokenType -from libcst._parser.types.token import Token - -_NodeT = TypeVar("_NodeT") -_TokenTypeT = TypeVar("_TokenTypeT", bound=TokenType) -_TokenT = TypeVar("_TokenT", bound=Token) - - -@dataclass(frozen=False) -class StackNode(Generic[_TokenTypeT, _NodeT]): - dfa: "DFAState[_TokenTypeT]" - nodes: List[_NodeT] = field(default_factory=list) - - @property - def nonterminal(self) -> str: - return self.dfa.from_rule - - -def _token_to_transition( - grammar: "Grammar[_TokenTypeT]", type_: _TokenTypeT, value: str -) -> Union[ReservedString, _TokenTypeT]: - # Map from token to label - if type_.contains_syntax: - # Check for reserved words (keywords) - try: - return grammar.reserved_syntax_strings[value] - except KeyError: - pass - - return type_ - - -# TODO: This should be an ABC, but there's a metaclass conflict between Generic and ABC -# that's fixed in Python 3.7. -class BaseParser(Generic[_TokenT, _TokenTypeT, _NodeT]): - """Parser engine. - - A Parser instance contains state pertaining to the current token - sequence, and should not be used concurrently by different threads - to parse separate token sequences. - - See python/tokenize.py for how to get input tokens by a string. - """ - - tokens: Iterable[_TokenT] - lines: Sequence[str] # used when generating parse errors - _pgen_grammar: "Grammar[_TokenTypeT]" - stack: List[StackNode[_TokenTypeT, _NodeT]] - # Keep track of if parse was called. Because a parser may keep global mutable state, - # each BaseParser instance should only be used once. - __was_parse_called: bool - - def __init__( - self, - *, - tokens: Iterable[_TokenT], - lines: Sequence[str], - pgen_grammar: "Grammar[_TokenTypeT]", - start_nonterminal: str, - ) -> None: - self.tokens = tokens - self.lines = lines - self._pgen_grammar = pgen_grammar - first_dfa = pgen_grammar.nonterminal_to_dfas[start_nonterminal][0] - self.stack = [StackNode(first_dfa)] - self.__was_parse_called = False - - def parse(self) -> _NodeT: - # Ensure that we don't re-use parsers. - if self.__was_parse_called: - raise ValueError("Each parser object may only be used to parse once.") - self.__was_parse_called = True - - for token in self.tokens: - self._add_token(token) - - while True: - tos = self.stack[-1] - if not tos.dfa.is_final: - expected_str = get_expected_str( - EOFSentinel.EOF, tos.dfa.transitions.keys() - ) - raise ParserSyntaxError( - f"Incomplete input. {expected_str}", - lines=self.lines, - raw_line=len(self.lines), - raw_column=len(self.lines[-1]), - ) - - if len(self.stack) > 1: - self._pop() - else: - return self.convert_nonterminal(tos.nonterminal, tos.nodes) - - def convert_nonterminal( - self, nonterminal: str, children: Sequence[_NodeT] - ) -> _NodeT: ... - - def convert_terminal(self, token: _TokenT) -> _NodeT: ... - - def _add_token(self, token: _TokenT) -> None: - """ - This is the only core function for parsing. Here happens basically - everything. Everything is well prepared by the parser generator and we - only apply the necessary steps here. - """ - grammar = self._pgen_grammar - stack = self.stack - # pyre-fixme[6]: Expected `_TokenTypeT` for 2nd param but got `TokenType`. - transition = _token_to_transition(grammar, token.type, token.string) - - while True: - try: - plan = stack[-1].dfa.transitions[transition] - break - except KeyError: - if stack[-1].dfa.is_final: - try: - self._pop() - except PartialParserSyntaxError as ex: - # Upconvert the PartialParserSyntaxError to a ParserSyntaxError - # by backfilling the line/column information. - raise ParserSyntaxError( - ex.message, - lines=self.lines, - raw_line=token.start_pos[0], - raw_column=token.start_pos[1], - ) - except Exception as ex: - # convert_nonterminal may fail due to a bug in our code. Try to - # recover enough to at least tell us where in the file it - # failed. - raise ParserSyntaxError( - f"Internal error: {ex}", - lines=self.lines, - raw_line=token.start_pos[0], - raw_column=token.start_pos[1], - ) - else: - # We never broke out -- EOF is too soon -- Unfinished statement. - # - # BUG: The `expected_str` may not be complete because we already - # popped the other possibilities off the stack at this point, but - # it still seems useful to list some of the possibilities that we - # could've expected. - expected_str = get_expected_str( - token, stack[-1].dfa.transitions.keys() - ) - raise ParserSyntaxError( - f"Incomplete input. {expected_str}", - lines=self.lines, - raw_line=token.start_pos[0], - raw_column=token.start_pos[1], - ) - except IndexError: - # I don't think this will ever happen with Python's grammar, because if - # there are any extra tokens at the end of the input, we'll instead - # complain that we expected ENDMARKER. - # - # However, let's leave it just in case. - expected_str = get_expected_str(token, EOFSentinel.EOF) - raise ParserSyntaxError( - f"Too much input. {expected_str}", - lines=self.lines, - raw_line=token.start_pos[0], - raw_column=token.start_pos[1], - ) - - # Logically, `plan` is always defined, but pyre can't reasonably determine that. - stack[-1].dfa = plan.next_dfa - - for push in plan.dfa_pushes: - stack.append(StackNode(push)) - - leaf = self.convert_terminal(token) - stack[-1].nodes.append(leaf) - - def _pop(self) -> None: - tos = self.stack.pop() - # Unlike parso and lib2to3, we call `convert_nonterminal` unconditionally - # instead of only when we have more than one child. This allows us to create a - # far more consistent and predictable tree. - new_node = self.convert_nonterminal(tos.dfa.from_rule, tos.nodes) - self.stack[-1].nodes.append(new_node) diff --git a/libcst/_parser/custom_itertools.py b/libcst/_parser/custom_itertools.py deleted file mode 100644 index 81cfdb4b..00000000 --- a/libcst/_parser/custom_itertools.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from itertools import zip_longest -from typing import Iterable, Iterator, TypeVar - -_T = TypeVar("_T") - - -# https://docs.python.org/3/library/itertools.html#itertools-recipes -def grouper(iterable: Iterable[_T], n: int, fillvalue: _T = None) -> Iterator[_T]: - "Collect data into fixed-length chunks or blocks" - # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" - args = [iter(iterable)] * n - return zip_longest(*args, fillvalue=fillvalue) diff --git a/libcst/_parser/grammar.py b/libcst/_parser/grammar.py deleted file mode 100644 index ee65ef72..00000000 --- a/libcst/_parser/grammar.py +++ /dev/null @@ -1,413 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -import re -from functools import lru_cache -from typing import FrozenSet, Iterator, Mapping, Optional, Tuple, Union - -from libcst._parser.conversions.expression import ( - convert_arg_assign_comp_for, - convert_arglist, - convert_argument, - convert_atom, - convert_atom_basic, - convert_atom_curlybraces, - convert_atom_ellipses, - convert_atom_expr, - convert_atom_expr_await, - convert_atom_expr_trailer, - convert_atom_parens, - convert_atom_squarebrackets, - convert_atom_string, - convert_binop, - convert_boolop, - convert_comp_for, - convert_comp_if, - convert_comp_op, - convert_comparison, - convert_dictorsetmaker, - convert_expression_input, - convert_factor, - convert_fstring, - convert_fstring_content, - convert_fstring_conversion, - convert_fstring_equality, - convert_fstring_expr, - convert_fstring_format_spec, - convert_lambda, - convert_namedexpr_test, - convert_not_test, - convert_power, - convert_sliceop, - convert_star_arg, - convert_star_expr, - convert_subscript, - convert_subscriptlist, - convert_sync_comp_for, - convert_test, - convert_test_nocond, - convert_test_or_expr_list, - convert_testlist_comp_list, - convert_testlist_comp_tuple, - convert_trailer, - convert_trailer_arglist, - convert_trailer_attribute, - convert_trailer_subscriptlist, - convert_yield_arg, - convert_yield_expr, -) -from libcst._parser.conversions.module import convert_file_input -from libcst._parser.conversions.params import ( - convert_argslist, - convert_fpdef, - convert_fpdef_assign, - convert_fpdef_slash, - convert_fpdef_star, - convert_fpdef_starstar, -) -from libcst._parser.conversions.statement import ( - convert_annassign, - convert_assert_stmt, - convert_assign, - convert_asyncable_funcdef, - convert_asyncable_stmt, - convert_augassign, - convert_break_stmt, - convert_classdef, - convert_compound_stmt, - convert_continue_stmt, - convert_decorated, - convert_decorator, - convert_decorators, - convert_del_stmt, - convert_dotted_as_name, - convert_dotted_as_names, - convert_dotted_name, - convert_except_clause, - convert_expr_stmt, - convert_for_stmt, - convert_funcdef, - convert_funcdef_annotation, - convert_global_stmt, - convert_if_stmt, - convert_if_stmt_elif, - convert_if_stmt_else, - convert_import_as_name, - convert_import_as_names, - convert_import_from, - convert_import_name, - convert_import_relative, - convert_import_stmt, - convert_indented_suite, - convert_nonlocal_stmt, - convert_parameters, - convert_pass_stmt, - convert_raise_stmt, - convert_return_stmt, - convert_simple_stmt_line, - convert_simple_stmt_partial, - convert_simple_stmt_suite, - convert_small_stmt, - convert_stmt, - convert_stmt_input, - convert_suite, - convert_try_stmt, - convert_while_stmt, - convert_with_item, - convert_with_stmt, -) -from libcst._parser.conversions.terminals import ( - convert_ASYNC, - convert_AWAIT, - convert_DEDENT, - convert_ENDMARKER, - convert_FSTRING_END, - convert_FSTRING_START, - convert_FSTRING_STRING, - convert_INDENT, - convert_NAME, - convert_NEWLINE, - convert_NUMBER, - convert_OP, - convert_STRING, -) -from libcst._parser.parso.pgen2.generator import generate_grammar, Grammar -from libcst._parser.parso.python.token import PythonTokenTypes, TokenType -from libcst._parser.parso.utils import parse_version_string, PythonVersionInfo -from libcst._parser.production_decorator import get_productions -from libcst._parser.types.config import AutoConfig -from libcst._parser.types.conversions import NonterminalConversion, TerminalConversion -from libcst._parser.types.production import Production - -# Keep this sorted alphabetically -_TERMINAL_CONVERSIONS_SEQUENCE: Tuple[TerminalConversion, ...] = ( - convert_DEDENT, - convert_ENDMARKER, - convert_INDENT, - convert_NAME, - convert_NEWLINE, - convert_NUMBER, - convert_OP, - convert_STRING, - convert_FSTRING_START, - convert_FSTRING_END, - convert_FSTRING_STRING, - convert_ASYNC, - convert_AWAIT, -) - -# Try to match the order of https://docs.python.org/3/reference/grammar.html -_NONTERMINAL_CONVERSIONS_SEQUENCE: Tuple[NonterminalConversion, ...] = ( - convert_file_input, - convert_stmt_input, # roughly equivalent to single_input - convert_expression_input, # roughly equivalent to eval_input - convert_stmt, - convert_simple_stmt_partial, - convert_simple_stmt_line, - convert_simple_stmt_suite, - convert_small_stmt, - convert_expr_stmt, - convert_annassign, - convert_augassign, - convert_assign, - convert_pass_stmt, - convert_continue_stmt, - convert_break_stmt, - convert_del_stmt, - convert_import_stmt, - convert_import_name, - convert_import_relative, - convert_import_from, - convert_import_as_name, - convert_dotted_as_name, - convert_import_as_names, - convert_dotted_as_names, - convert_dotted_name, - convert_return_stmt, - convert_raise_stmt, - convert_global_stmt, - convert_nonlocal_stmt, - convert_assert_stmt, - convert_compound_stmt, - convert_if_stmt, - convert_if_stmt_elif, - convert_if_stmt_else, - convert_while_stmt, - convert_for_stmt, - convert_try_stmt, - convert_except_clause, - convert_with_stmt, - convert_with_item, - convert_asyncable_funcdef, - convert_funcdef, - convert_classdef, - convert_decorator, - convert_decorators, - convert_decorated, - convert_asyncable_stmt, - convert_parameters, - convert_argslist, - convert_fpdef_slash, - convert_fpdef_star, - convert_fpdef_starstar, - convert_fpdef_assign, - convert_fpdef, - convert_funcdef_annotation, - convert_suite, - convert_indented_suite, - convert_namedexpr_test, - convert_test, - convert_test_nocond, - convert_lambda, - convert_boolop, - convert_not_test, - convert_comparison, - convert_comp_op, - convert_star_expr, - convert_binop, - convert_factor, - convert_power, - convert_atom_expr, - convert_atom_expr_await, - convert_atom_expr_trailer, - convert_trailer, - convert_trailer_attribute, - convert_trailer_subscriptlist, - convert_subscriptlist, - convert_subscript, - convert_sliceop, - convert_trailer_arglist, - convert_atom, - convert_atom_basic, - convert_atom_parens, - convert_atom_squarebrackets, - convert_atom_curlybraces, - convert_atom_string, - convert_fstring, - convert_fstring_content, - convert_fstring_conversion, - convert_fstring_equality, - convert_fstring_expr, - convert_fstring_format_spec, - convert_atom_ellipses, - convert_testlist_comp_tuple, - convert_testlist_comp_list, - convert_test_or_expr_list, - convert_dictorsetmaker, - convert_arglist, - convert_argument, - convert_arg_assign_comp_for, - convert_star_arg, - convert_sync_comp_for, - convert_comp_for, - convert_comp_if, - convert_yield_expr, - convert_yield_arg, -) - - -def get_grammar_str(version: PythonVersionInfo, future_imports: FrozenSet[str]) -> str: - """ - Returns an BNF-like grammar text that `parso.pgen2.generator.generate_grammar` can - handle. - - While you should generally use `get_grammar` instead, this can be useful for - debugging the grammar. - """ - lines = [] - for p in get_nonterminal_productions(version, future_imports): - lines.append(str(p)) - return "\n".join(lines) + "\n" - - -# TODO: We should probably provide an on-disk cache like parso and lib2to3 do. Because -# of how we're defining our grammar, efficient cache invalidation is harder, though not -# impossible. -@lru_cache() -def get_grammar( - version: PythonVersionInfo, - future_imports: Union[FrozenSet[str], AutoConfig], -) -> "Grammar[TokenType]": - if isinstance(future_imports, AutoConfig): - # For easier testing, if not provided assume no __future__ imports - future_imports = frozenset(()) - return generate_grammar(get_grammar_str(version, future_imports), PythonTokenTypes) - - -@lru_cache() -def get_terminal_conversions() -> Mapping[str, TerminalConversion]: - """ - Returns a mapping from terminal type name to the conversion function that should be - called by the parser. - """ - return { - # pyre-fixme[16]: Optional type has no attribute `group`. - re.match("convert_(.*)", fn.__name__).group(1): fn - for fn in _TERMINAL_CONVERSIONS_SEQUENCE - } - - -@lru_cache() -def validate_grammar() -> None: - for fn in _NONTERMINAL_CONVERSIONS_SEQUENCE: - fn_productions = get_productions(fn) - if all(p.name == fn_productions[0].name for p in fn_productions): - # all the production names are the same, ensure that the `convert_` function - # is named correctly - production_name = fn_productions[0].name - expected_name = f"convert_{production_name}" - if fn.__name__ != expected_name: - raise ValueError( - f"The conversion function for '{production_name}' " - + f"must be called '{expected_name}', not '{fn.__name__}'." - ) - - -def _get_version_comparison(version: str) -> Tuple[str, PythonVersionInfo]: - if version[:2] in (">=", "<=", "==", "!="): - return (version[:2], parse_version_string(version[2:].strip())) - if version[:1] in (">", "<"): - return (version[:1], parse_version_string(version[1:].strip())) - raise ValueError(f"Invalid version comparison specifier '{version}'") - - -def _compare_versions( - requested_version: PythonVersionInfo, - actual_version: PythonVersionInfo, - comparison: str, -) -> bool: - if comparison == ">=": - return actual_version >= requested_version - if comparison == "<=": - return actual_version <= requested_version - if comparison == "==": - return actual_version == requested_version - if comparison == "!=": - return actual_version != requested_version - if comparison == ">": - return actual_version > requested_version - if comparison == "<": - return actual_version < requested_version - raise ValueError(f"Invalid version comparison specifier '{comparison}'") - - -def _should_include( - requested_version: Optional[str], actual_version: PythonVersionInfo -) -> bool: - if requested_version is None: - return True - for version in requested_version.split(","): - comparison, parsed_version = _get_version_comparison(version.strip()) - if not _compare_versions(parsed_version, actual_version, comparison): - return False - return True - - -def _should_include_future( - future: Optional[str], - future_imports: FrozenSet[str], -) -> bool: - if future is None: - return True - if future[:1] == "!": - return future[1:] not in future_imports - return future in future_imports - - -def get_nonterminal_productions( - version: PythonVersionInfo, future_imports: FrozenSet[str] -) -> Iterator[Production]: - for conversion in _NONTERMINAL_CONVERSIONS_SEQUENCE: - for production in get_productions(conversion): - if not _should_include(production.version, version): - continue - if not _should_include_future(production.future, future_imports): - continue - yield production - - -@lru_cache() -def get_nonterminal_conversions( - version: PythonVersionInfo, - future_imports: FrozenSet[str], -) -> Mapping[str, NonterminalConversion]: - """ - Returns a mapping from nonterminal production name to the conversion function that - should be called by the parser. - """ - conversions = {} - for fn in _NONTERMINAL_CONVERSIONS_SEQUENCE: - for fn_production in get_productions(fn): - if not _should_include(fn_production.version, version): - continue - if not _should_include_future(fn_production.future, future_imports): - continue - if fn_production.name in conversions: - raise ValueError( - f"Found duplicate '{fn_production.name}' production in grammar" - ) - conversions[fn_production.name] = fn - - return conversions diff --git a/libcst/_parser/production_decorator.py b/libcst/_parser/production_decorator.py deleted file mode 100644 index d5ba52de..00000000 --- a/libcst/_parser/production_decorator.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Callable, Optional, Sequence, TypeVar - -from libcst._parser.types.conversions import NonterminalConversion -from libcst._parser.types.production import Production - -_NonterminalConversionT = TypeVar( - "_NonterminalConversionT", bound=NonterminalConversion -) - - -# We could version our grammar at a later point by adding a version metadata kwarg to -# this decorator. -def with_production( - production_name: str, - children: str, - *, - version: Optional[str] = None, - future: Optional[str] = None, - # pyre-fixme[34]: `Variable[_NonterminalConversionT (bound to - # typing.Callable[[libcst_native.parser_config.ParserConfig, - # typing.Sequence[typing.Any]], typing.Any])]` isn't present in the function's - # parameters. -) -> Callable[[_NonterminalConversionT], _NonterminalConversionT]: - """ - Attaches a bit of grammar to a conversion function. The parser extracts all of these - production strings, and uses it to form the language's full grammar. - - If you need to attach multiple productions to the same conversion function - """ - - def inner(fn: _NonterminalConversionT) -> _NonterminalConversionT: - if not hasattr(fn, "productions"): - fn.productions = [] - # pyre-ignore: Pyre doesn't think that fn has a __name__ attribute - fn_name = fn.__name__ - if not fn_name.startswith("convert_"): - raise ValueError( - "A function with a production must be named 'convert_X', not " - + f"'{fn_name}'." - ) - # pyre-ignore: Pyre doesn't know about this magic field we added - fn.productions.append(Production(production_name, children, version, future)) - return fn - - return inner - - -def get_productions(fn: NonterminalConversion) -> Sequence[Production]: - # pyre-ignore Pyre doesn't know about this magic field we added - return fn.productions diff --git a/libcst/_parser/py_whitespace_parser.py b/libcst/_parser/py_whitespace_parser.py deleted file mode 100644 index 6b6573a6..00000000 --- a/libcst/_parser/py_whitespace_parser.py +++ /dev/null @@ -1,271 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Sequence, Tuple, Union - -from libcst import CSTLogicError, ParserSyntaxError -from libcst._nodes.whitespace import ( - Comment, - COMMENT_RE, - EmptyLine, - Newline, - NEWLINE_RE, - ParenthesizedWhitespace, - SIMPLE_WHITESPACE_RE, - SimpleWhitespace, - TrailingWhitespace, -) -from libcst._parser.types.config import BaseWhitespaceParserConfig -from libcst._parser.types.whitespace_state import WhitespaceState as State - -# BEGIN PARSER ENTRYPOINTS - - -def parse_simple_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> SimpleWhitespace: - # The match never fails because the pattern can match an empty string - lines = config.lines - # pyre-fixme[16]: Optional type has no attribute `group`. - ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) - ws_line_list = [ws_line] - while "\\" in ws_line: - # continuation character - state.line += 1 - state.column = 0 - ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group( - 0 - ) - ws_line_list.append(ws_line) - - # TODO: we could special-case the common case where there's no continuation - # character to avoid list construction and joining. - - # once we've finished collecting continuation characters - state.column += len(ws_line) - return SimpleWhitespace("".join(ws_line_list)) - - -def parse_empty_lines( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> Sequence[EmptyLine]: - # If override_absolute_indent is true, then we need to parse all lines up - # to and including the last line that is indented at our level. These all - # belong to the footer and not to the next line's leading_lines. All lines - # that have indent=False and come after the last line where indent=True - # do not belong to this node. - state_for_line = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - lines: List[Tuple[State, EmptyLine]] = [] - while True: - el = _parse_empty_line( - config, state_for_line, override_absolute_indent=override_absolute_indent - ) - if el is None: - break - - # Store the updated state with the element we parsed. Then make a new state - # clone for the next element. - lines.append((state_for_line, el)) - state_for_line = State( - state_for_line.line, - state_for_line.column, - state.absolute_indent, - state.is_parenthesized, - ) - - if override_absolute_indent is not None: - # We need to find the last element that is indented, and then split the list - # at that point. - for i in range(len(lines) - 1, -1, -1): - if lines[i][1].indent: - lines = lines[: (i + 1)] - break - else: - # We didn't find any lines, throw them all away - lines = [] - - if lines: - # Update the state line and column to match the last line actually parsed. - final_state: State = lines[-1][0] - state.line = final_state.line - state.column = final_state.column - return [r[1] for r in lines] - - -def parse_trailing_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> TrailingWhitespace: - trailing_whitespace = _parse_trailing_whitespace(config, state) - if trailing_whitespace is None: - raise ParserSyntaxError( - "Internal Error: Failed to parse TrailingWhitespace. This should never " - + "happen because a TrailingWhitespace is never optional in the grammar, " - + "so this error should've been caught by parso first.", - lines=config.lines, - raw_line=state.line, - raw_column=state.column, - ) - return trailing_whitespace - - -def parse_parenthesizable_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Union[SimpleWhitespace, ParenthesizedWhitespace]: - if state.is_parenthesized: - # First, try parenthesized (don't need speculation because it either - # parses or doesn't modify state). - parenthesized_whitespace = _parse_parenthesized_whitespace(config, state) - if parenthesized_whitespace is not None: - return parenthesized_whitespace - # Now, just parse and return a simple whitespace - return parse_simple_whitespace(config, state) - - -# END PARSER ENTRYPOINTS -# BEGIN PARSER INTERNAL PRODUCTIONS - - -def _parse_empty_line( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> Optional[EmptyLine]: - # begin speculative parsing - speculative_state = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - try: - indent = _parse_indent( - config, speculative_state, override_absolute_indent=override_absolute_indent - ) - except Exception: - # We aren't on a new line, speculative parsing failed - return None - whitespace = parse_simple_whitespace(config, speculative_state) - comment = _parse_comment(config, speculative_state) - newline = _parse_newline(config, speculative_state) - if newline is None: - # speculative parsing failed - return None - # speculative parsing succeeded - state.line = speculative_state.line - state.column = speculative_state.column - # don't need to copy absolute_indent/is_parenthesized because they don't change. - return EmptyLine(indent, whitespace, comment, newline) - - -def _parse_indent( - config: BaseWhitespaceParserConfig, - state: State, - *, - override_absolute_indent: Optional[str] = None, -) -> bool: - """ - Returns True if indentation was found, otherwise False. - """ - absolute_indent = ( - override_absolute_indent - if override_absolute_indent is not None - else state.absolute_indent - ) - line_str = config.lines[state.line - 1] - if state.column != 0: - if state.column == len(line_str) and state.line == len(config.lines): - # We're at EOF, treat this as a failed speculative parse - return False - raise CSTLogicError( - "Internal Error: Column should be 0 when parsing an indent." - ) - if line_str.startswith(absolute_indent, state.column): - state.column += len(absolute_indent) - return True - return False - - -def _parse_comment( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[Comment]: - comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column) - if comment_match is None: - return None - comment = comment_match.group(0) - state.column += len(comment) - return Comment(comment) - - -def _parse_newline( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[Newline]: - # begin speculative parsing - line_str = config.lines[state.line - 1] - newline_match = NEWLINE_RE.match(line_str, state.column) - if newline_match is not None: - # speculative parsing succeeded - newline_str = newline_match.group(0) - state.column += len(newline_str) - if state.column != len(line_str): - raise ParserSyntaxError( - "Internal Error: Found a newline, but it wasn't the EOL.", - lines=config.lines, - raw_line=state.line, - raw_column=state.column, - ) - if state.line < len(config.lines): - # this newline was the end of a line, and there's another line, - # therefore we should move to the next line - state.line += 1 - state.column = 0 - if newline_str == config.default_newline: - # Just inherit it from the Module instead of explicitly setting it. - return Newline() - else: - return Newline(newline_str) - else: # no newline was found, speculative parsing failed - return None - - -def _parse_trailing_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[TrailingWhitespace]: - # Begin speculative parsing - speculative_state = State( - state.line, state.column, state.absolute_indent, state.is_parenthesized - ) - whitespace = parse_simple_whitespace(config, speculative_state) - comment = _parse_comment(config, speculative_state) - newline = _parse_newline(config, speculative_state) - if newline is None: - # Speculative parsing failed - return None - # Speculative parsing succeeded - state.line = speculative_state.line - state.column = speculative_state.column - # don't need to copy absolute_indent/is_parenthesized because they don't change. - return TrailingWhitespace(whitespace, comment, newline) - - -def _parse_parenthesized_whitespace( - config: BaseWhitespaceParserConfig, state: State -) -> Optional[ParenthesizedWhitespace]: - first_line = _parse_trailing_whitespace(config, state) - if first_line is None: - # Speculative parsing failed - return None - empty_lines = () - while True: - empty_line = _parse_empty_line(config, state) - if empty_line is None: - # This isn't an empty line, so parse it below - break - empty_lines = empty_lines + (empty_line,) - indent = _parse_indent(config, state) - last_line = parse_simple_whitespace(config, state) - return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line) diff --git a/libcst/_parser/python_parser.py b/libcst/_parser/python_parser.py deleted file mode 100644 index 7f3d53db..00000000 --- a/libcst/_parser/python_parser.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -from typing import Any, Iterable, Mapping, Sequence - -from libcst._parser.base_parser import BaseParser -from libcst._parser.grammar import get_nonterminal_conversions, get_terminal_conversions -from libcst._parser.parso.pgen2.generator import Grammar -from libcst._parser.parso.python.token import TokenType -from libcst._parser.types.config import ParserConfig -from libcst._parser.types.conversions import NonterminalConversion, TerminalConversion -from libcst._parser.types.token import Token - - -class PythonCSTParser(BaseParser[Token, TokenType, Any]): - config: ParserConfig - terminal_conversions: Mapping[str, TerminalConversion] - nonterminal_conversions: Mapping[str, NonterminalConversion] - - def __init__( - self, - *, - tokens: Iterable[Token], - config: ParserConfig, - pgen_grammar: "Grammar[TokenType]", - start_nonterminal: str = "file_input", - ) -> None: - super().__init__( - tokens=tokens, - lines=config.lines, - pgen_grammar=pgen_grammar, - start_nonterminal=start_nonterminal, - ) - self.config = config - self.terminal_conversions = get_terminal_conversions() - self.nonterminal_conversions = get_nonterminal_conversions( - config.version, config.future_imports - ) - - def convert_nonterminal(self, nonterminal: str, children: Sequence[Any]) -> Any: - return self.nonterminal_conversions[nonterminal](self.config, children) - - def convert_terminal(self, token: Token) -> Any: - return self.terminal_conversions[token.type.name](self.config, token) diff --git a/libcst/_parser/tests/__init__.py b/libcst/_parser/tests/__init__.py deleted file mode 100644 index 7bec24cb..00000000 --- a/libcst/_parser/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. diff --git a/libcst/_parser/tests/test_config.py b/libcst/_parser/tests/test_config.py deleted file mode 100644 index d9c31db5..00000000 --- a/libcst/_parser/tests/test_config.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-strict -from libcst._parser.parso.utils import PythonVersionInfo -from libcst._parser.types.config import _pick_compatible_python_version -from libcst.testing.utils import UnitTest - - -class ConfigTest(UnitTest): - def test_pick_compatible(self) -> None: - self.assertEqual( - PythonVersionInfo(3, 1), _pick_compatible_python_version("3.2") - ) - self.assertEqual( - PythonVersionInfo(3, 1), _pick_compatible_python_version("3.1") - ) - self.assertEqual( - PythonVersionInfo(3, 8), _pick_compatible_python_version("3.9") - ) - self.assertEqual( - PythonVersionInfo(3, 8), _pick_compatible_python_version("3.10") - ) - self.assertEqual( - PythonVersionInfo(3, 8), _pick_compatible_python_version("4.0") - ) - with self.assertRaisesRegex( - ValueError, - ( - r"No version found older than 1\.0 \(PythonVersionInfo\(" - + r"major=1, minor=0\)\) while running on" - ), - ): - _pick_compatible_python_version("1.0") diff --git a/libcst/_parser/tests/test_detect_config.py b/libcst/_parser/tests/test_detect_config.py deleted file mode 100644 index 6d9eaa6c..00000000 --- a/libcst/_parser/tests/test_detect_config.py +++ /dev/null @@ -1,331 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Union - -from libcst._parser.detect_config import detect_config -from libcst._parser.parso.utils import PythonVersionInfo -from libcst._parser.types.config import ( - parser_config_asdict, - ParserConfig, - PartialParserConfig, -) -from libcst.testing.utils import data_provider, UnitTest - - -class TestDetectConfig(UnitTest): - @data_provider( - { - "empty_input": { - "source": b"", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=False, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "detect_trailing_newline_disabled": { - "source": b"", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": False, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=[""], # the trailing newline isn't inserted - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=False, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "detect_default_newline_disabled": { - "source": b"pass\r", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": False, - "detect_default_newline": False, - "expected_config": ParserConfig( - lines=["pass\r", ""], # the trailing newline isn't inserted - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=False, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "newline_inferred": { - "source": b"first_line\r\n\nsomething\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["first_line\r\n", "\n", "something\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\r\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "newline_partial_given": { - "source": b"first_line\r\nsecond_line\r\n", - "partial": PartialParserConfig( - default_newline="\n", python_version="3.7" - ), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["first_line\r\n", "second_line\r\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", # The given partial disables inference - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "indent_inferred": { - "source": b"if test:\n\t something\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["if test:\n", "\t something\n", ""], - encoding="utf-8", - default_indent="\t ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "indent_partial_given": { - "source": b"if test:\n\t something\n", - "partial": PartialParserConfig( - default_indent=" ", python_version="3.7" - ), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["if test:\n", "\t something\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "encoding_inferred": { - "source": b"#!/usr/bin/python3\n# -*- coding: latin-1 -*-\npass\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=[ - "#!/usr/bin/python3\n", - "# -*- coding: latin-1 -*-\n", - "pass\n", - "", - ], - encoding="iso-8859-1", # this is an alias for latin-1 - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "encoding_partial_given": { - "source": b"#!/usr/bin/python3\n# -*- coding: latin-1 -*-\npass\n", - "partial": PartialParserConfig( - encoding="us-ascii", python_version="3.7" - ), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=[ - "#!/usr/bin/python3\n", - "# -*- coding: latin-1 -*-\n", - "pass\n", - "", - ], - encoding="us-ascii", - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "encoding_str_not_bytes_disables_inference": { - "source": "#!/usr/bin/python3\n# -*- coding: latin-1 -*-\npass\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=[ - "#!/usr/bin/python3\n", - "# -*- coding: latin-1 -*-\n", - "pass\n", - "", - ], - encoding="utf-8", # because source is a str, don't infer latin-1 - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "encoding_non_ascii_compatible_utf_16_with_bom": { - "source": b"\xff\xfet\x00e\x00s\x00t\x00", - "partial": PartialParserConfig(encoding="utf-16", python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["test\n", ""], - encoding="utf-16", - default_indent=" ", - default_newline="\n", - has_trailing_newline=False, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "detect_trailing_newline_missing_newline": { - "source": b"test", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["test\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=False, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "detect_trailing_newline_has_newline": { - "source": b"test\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["test\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "detect_trailing_newline_missing_newline_after_line_continuation": { - "source": b"test\\\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["test\\\n", "\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=False, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "detect_trailing_newline_has_newline_after_line_continuation": { - "source": b"test\\\n\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=["test\\\n", "\n", ""], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset(), - ), - }, - "future_imports_in_correct_position": { - "source": b"# C\n''' D '''\nfrom __future__ import a as b\n", - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=[ - "# C\n", - "''' D '''\n", - "from __future__ import a as b\n", - "", - ], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset({"a"}), - ), - }, - "future_imports_in_mixed_position": { - "source": ( - b"from __future__ import a, b\nimport os\n" - + b"from __future__ import c\n" - ), - "partial": PartialParserConfig(python_version="3.7"), - "detect_trailing_newline": True, - "detect_default_newline": True, - "expected_config": ParserConfig( - lines=[ - "from __future__ import a, b\n", - "import os\n", - "from __future__ import c\n", - "", - ], - encoding="utf-8", - default_indent=" ", - default_newline="\n", - has_trailing_newline=True, - version=PythonVersionInfo(3, 7), - future_imports=frozenset({"a", "b"}), - ), - }, - } - ) - def test_detect_module_config( - self, - *, - source: Union[str, bytes], - partial: PartialParserConfig, - detect_trailing_newline: bool, - detect_default_newline: bool, - expected_config: ParserConfig, - ) -> None: - self.assertEqual( - parser_config_asdict( - detect_config( - source, - partial=partial, - detect_trailing_newline=detect_trailing_newline, - detect_default_newline=detect_default_newline, - ).config - ), - parser_config_asdict(expected_config), - ) diff --git a/libcst/_parser/tests/test_footer_behavior.py b/libcst/_parser/tests/test_footer_behavior.py deleted file mode 100644 index ccac8254..00000000 --- a/libcst/_parser/tests/test_footer_behavior.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from textwrap import dedent - -import libcst as cst -from libcst import parse_module -from libcst._nodes.deep_equals import deep_equals -from libcst.testing.utils import data_provider, UnitTest - - -class FooterBehaviorTest(UnitTest): - @data_provider( - { - # Literally the most basic example - "simple_module": { - "code": "", - "expected_module": cst.Module(body=(), has_trailing_newline=False), - }, - # A module with a header comment - "header_only_module": { - "code": "# This is a header comment\n", - "expected_module": cst.Module( - header=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a header comment") - ) - ], - body=[], - ), - }, - # A module with a header and footer - "simple_header_footer_module": { - "code": "# This is a header comment\npass\n# This is a footer comment\n", - "expected_module": cst.Module( - header=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a header comment") - ) - ], - body=[cst.SimpleStatementLine([cst.Pass()])], - footer=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a footer comment") - ) - ], - ), - }, - # A module which should have a footer comment taken from the - # if statement's indented block. - "simple_reparented_footer_module": { - "code": "# This is a header comment\nif True:\n pass\n# This is a footer comment\n", - "expected_module": cst.Module( - header=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a header comment") - ) - ], - body=[ - cst.If( - test=cst.Name(value="True"), - body=cst.IndentedBlock( - header=cst.TrailingWhitespace(), - body=[ - cst.SimpleStatementLine( - body=[cst.Pass()], - trailing_whitespace=cst.TrailingWhitespace(), - ) - ], - ), - ) - ], - footer=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a footer comment") - ) - ], - ), - }, - # Verifying that we properly parse and spread out footer comments to the - # relative indents they go with. - "complex_reparented_footer_module": { - "code": ( - "# This is a header comment\nif True:\n if True:\n pass" - + "\n # This is an inner indented block comment\n # This " - + "is an outer indented block comment\n# This is a footer comment\n" - ), - "expected_module": cst.Module( - body=[ - cst.If( - test=cst.Name(value="True"), - body=cst.IndentedBlock( - body=[ - cst.If( - test=cst.Name(value="True"), - body=cst.IndentedBlock( - body=[ - cst.SimpleStatementLine( - body=[cst.Pass()] - ) - ], - footer=[ - cst.EmptyLine( - comment=cst.Comment( - value="# This is an inner indented block comment" - ) - ) - ], - ), - ) - ], - footer=[ - cst.EmptyLine( - comment=cst.Comment( - value="# This is an outer indented block comment" - ) - ) - ], - ), - ) - ], - header=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a header comment") - ) - ], - footer=[ - cst.EmptyLine( - comment=cst.Comment(value="# This is a footer comment") - ) - ], - ), - }, - # Verify that comments belonging to statements are still owned even - # after an indented block. - "statement_comment_reparent": { - "code": "if foo:\n return\n# comment\nx = 7\n", - "expected_module": cst.Module( - body=[ - cst.If( - test=cst.Name(value="foo"), - body=cst.IndentedBlock( - body=[ - cst.SimpleStatementLine( - body=[ - cst.Return( - whitespace_after_return=cst.SimpleWhitespace( - value="" - ) - ) - ] - ) - ] - ), - ), - cst.SimpleStatementLine( - body=[ - cst.Assign( - targets=[ - cst.AssignTarget(target=cst.Name(value="x")) - ], - value=cst.Integer(value="7"), - ) - ], - leading_lines=[ - cst.EmptyLine(comment=cst.Comment(value="# comment")) - ], - ), - ] - ), - }, - # Verify that even if there are completely empty lines, we give all lines - # up to and including the last line that's indented correctly. That way - # comments that line up with indented block's indentation level aren't - # parented to the next line just because there's a blank line or two - # between them. - "statement_comment_with_empty_lines": { - "code": ( - "def foo():\n if True:\n pass\n\n # Empty " - + "line before me\n\n else:\n pass\n" - ), - "expected_module": cst.Module( - body=[ - cst.FunctionDef( - name=cst.Name(value="foo"), - params=cst.Parameters(), - body=cst.IndentedBlock( - body=[ - cst.If( - test=cst.Name(value="True"), - body=cst.IndentedBlock( - body=[ - cst.SimpleStatementLine( - body=[cst.Pass()] - ) - ], - footer=[ - cst.EmptyLine(indent=False), - cst.EmptyLine( - comment=cst.Comment( - value="# Empty line before me" - ) - ), - ], - ), - orelse=cst.Else( - body=cst.IndentedBlock( - body=[ - cst.SimpleStatementLine( - body=[cst.Pass()] - ) - ] - ), - leading_lines=[cst.EmptyLine(indent=False)], - ), - ) - ] - ), - ) - ] - ), - }, - } - ) - def test_parsers(self, code: str, expected_module: cst.CSTNode) -> None: - parsed_module = parse_module(dedent(code)) - self.assertTrue( - deep_equals(parsed_module, expected_module), - msg=f"\n{parsed_module!r}\nis not deeply equal to \n{expected_module!r}", - ) diff --git a/libcst/_parser/tests/test_node_identity.py b/libcst/_parser/tests/test_node_identity.py deleted file mode 100644 index 91171915..00000000 --- a/libcst/_parser/tests/test_node_identity.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -from collections import Counter -from textwrap import dedent - -import libcst as cst -from libcst.testing.utils import data_provider, UnitTest - - -class DuplicateLeafNodeTest(UnitTest): - @data_provider( - ( - # Simple program - ( - """ - foo = 'toplevel' - fn1(foo) - fn2(foo) - def fn_def(): - foo = 'shadow' - fn3(foo) - """, - ), - ) - ) - def test_tokenize(self, code: str) -> None: - test_case = self - - class CountVisitor(cst.CSTVisitor): - def __init__(self) -> None: - self.count = Counter() - self.nodes = {} - - def on_visit(self, node: cst.CSTNode) -> bool: - self.count[id(node)] += 1 - test_case.assertTrue( - self.count[id(node)] == 1, - f"Node duplication detected between {node} and {self.nodes.get(id(node))}", - ) - self.nodes[id(node)] = node - return True - - module = cst.parse_module(dedent(code)) - module.visit(CountVisitor()) diff --git a/libcst/_parser/tests/test_parse_errors.py b/libcst/_parser/tests/test_parse_errors.py deleted file mode 100644 index 2af51db1..00000000 --- a/libcst/_parser/tests/test_parse_errors.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -from textwrap import dedent -from typing import Callable -from unittest.mock import patch - -import libcst as cst -from libcst._nodes.base import CSTValidationError - -from libcst.testing.utils import data_provider, UnitTest - - -class ParseErrorsTest(UnitTest): - @data_provider( - { - # _wrapped_tokenize raises these exceptions - "wrapped_tokenize__invalid_token": ( - lambda: cst.parse_module("'"), - dedent( - """ - Syntax Error @ 1:1. - "'" is not a valid token. - - ' - ^ - """ - ).strip(), - ), - "wrapped_tokenize__expected_dedent": ( - lambda: cst.parse_module("if False:\n pass\n pass"), - dedent( - """ - Syntax Error @ 3:1. - Inconsistent indentation. Expected a dedent. - - pass - ^ - """ - ).strip(), - ), - "wrapped_tokenize__mismatched_braces": ( - lambda: cst.parse_module("abcd)"), - dedent( - """ - Syntax Error @ 1:5. - Encountered a closing brace without a matching opening brace. - - abcd) - ^ - """ - ).strip(), - ), - # _base_parser raises these exceptions - "base_parser__unexpected_indent": ( - lambda: cst.parse_module(" abcd"), - dedent( - """ - Syntax Error @ 1:5. - Incomplete input. Unexpectedly encountered an indent. - - abcd - ^ - """ - ).strip(), - ), - "base_parser__unexpected_dedent": ( - lambda: cst.parse_module("if False:\n (el for el\n"), - dedent( - """ - Syntax Error @ 3:1. - Incomplete input. Encountered a dedent, but expected 'in'. - - (el for el - ^ - """ - ).strip(), - ), - "base_parser__multiple_possibilities": ( - lambda: cst.parse_module("try: pass"), - dedent( - """ - Syntax Error @ 2:1. - Incomplete input. Encountered end of file (EOF), but expected 'except', or 'finally'. - - try: pass - ^ - """ - ).strip(), - ), - # conversion functions raise these exceptions. - # `_base_parser` is responsible for attaching location information. - "convert_nonterminal__dict_unpacking": ( - lambda: cst.parse_expression("{**el for el in []}"), - dedent( - """ - Syntax Error @ 1:19. - dict unpacking cannot be used in dict comprehension - - {**el for el in []} - ^ - """ - ).strip(), - ), - "convert_nonterminal__arglist_non_default_after_default": ( - lambda: cst.parse_statement("def fn(first=None, second): ..."), - dedent( - """ - Syntax Error @ 1:26. - Cannot have a non-default argument following a default argument. - - def fn(first=None, second): ... - ^ - """ - ).strip(), - ), - "convert_nonterminal__arglist_trailing_param_star_without_comma": ( - lambda: cst.parse_statement("def fn(abc, *): ..."), - dedent( - """ - Syntax Error @ 1:14. - Named (keyword) arguments must follow a bare *. - - def fn(abc, *): ... - ^ - """ - ).strip(), - ), - "convert_nonterminal__arglist_trailing_param_star_with_comma": ( - lambda: cst.parse_statement("def fn(abc, *,): ..."), - dedent( - """ - Syntax Error @ 1:15. - Named (keyword) arguments must follow a bare *. - - def fn(abc, *,): ... - ^ - """ - ).strip(), - ), - "convert_nonterminal__class_arg_positional_after_keyword": ( - lambda: cst.parse_statement("class Cls(first=None, second): ..."), - dedent( - """ - Syntax Error @ 2:1. - Positional argument follows keyword argument. - - class Cls(first=None, second): ... - ^ - """ - ).strip(), - ), - "convert_nonterminal__class_arg_positional_expansion_after_keyword": ( - lambda: cst.parse_statement("class Cls(first=None, *second): ..."), - dedent( - """ - Syntax Error @ 2:1. - Positional argument follows keyword argument. - - class Cls(first=None, *second): ... - ^ - """ - ).strip(), - ), - } - ) - def test_parser_syntax_error_str( - self, parse_fn: Callable[[], object], expected: str - ) -> None: - with self.assertRaises(cst.ParserSyntaxError) as cm: - parse_fn() - # make sure str() doesn't blow up - self.assertIn("Syntax Error", str(cm.exception)) - - def test_native_fallible_into_py(self) -> None: - with patch("libcst._nodes.expression.Name._validate") as await_validate: - await_validate.side_effect = CSTValidationError("validate is broken") - with self.assertRaises((SyntaxError, cst.ParserSyntaxError)): - cst.parse_module("foo") diff --git a/libcst/_parser/tests/test_version_compare.py b/libcst/_parser/tests/test_version_compare.py deleted file mode 100644 index 102657d6..00000000 --- a/libcst/_parser/tests/test_version_compare.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from libcst._parser.grammar import _should_include -from libcst._parser.parso.utils import PythonVersionInfo -from libcst.testing.utils import data_provider, UnitTest - - -class VersionCompareTest(UnitTest): - @data_provider( - ( - # Simple equality - ("==3.6", PythonVersionInfo(3, 6), True), - ("!=3.6", PythonVersionInfo(3, 6), False), - # Equal or GT/LT - (">=3.6", PythonVersionInfo(3, 5), False), - (">=3.6", PythonVersionInfo(3, 6), True), - (">=3.6", PythonVersionInfo(3, 7), True), - ("<=3.6", PythonVersionInfo(3, 5), True), - ("<=3.6", PythonVersionInfo(3, 6), True), - ("<=3.6", PythonVersionInfo(3, 7), False), - # GT/LT - (">3.6", PythonVersionInfo(3, 5), False), - (">3.6", PythonVersionInfo(3, 6), False), - (">3.6", PythonVersionInfo(3, 7), True), - ("<3.6", PythonVersionInfo(3, 5), True), - ("<3.6", PythonVersionInfo(3, 6), False), - ("<3.6", PythonVersionInfo(3, 7), False), - # Multiple checks - (">3.6,<3.8", PythonVersionInfo(3, 6), False), - (">3.6,<3.8", PythonVersionInfo(3, 7), True), - (">3.6,<3.8", PythonVersionInfo(3, 8), False), - ) - ) - def test_tokenize( - self, - requested_version: str, - actual_version: PythonVersionInfo, - expected_result: bool, - ) -> None: - self.assertEqual( - _should_include(requested_version, actual_version), expected_result - ) diff --git a/libcst/_parser/tests/test_whitespace_parser.py b/libcst/_parser/tests/test_whitespace_parser.py deleted file mode 100644 index bbe8886a..00000000 --- a/libcst/_parser/tests/test_whitespace_parser.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Callable, TypeVar - -import libcst as cst -from libcst._nodes.deep_equals import deep_equals -from libcst._parser.types.config import MockWhitespaceParserConfig as Config -from libcst._parser.types.whitespace_state import WhitespaceState as State -from libcst._parser.whitespace_parser import ( - parse_empty_lines, - parse_simple_whitespace, - parse_trailing_whitespace, -) -from libcst.testing.utils import data_provider, UnitTest - -_T = TypeVar("_T") - - -class WhitespaceParserTest(UnitTest): - @data_provider( - { - "simple_whitespace_empty": { - "parser": parse_simple_whitespace, - "config": Config( - lines=["not whitespace\n", " another line\n"], default_newline="\n" - ), - "start_state": State( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - "expected_node": cst.SimpleWhitespace(""), - }, - "simple_whitespace_start_of_line": { - "parser": parse_simple_whitespace, - "config": Config( - lines=["\t <-- There's some whitespace there\n"], - default_newline="\n", - ), - "start_state": State( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=1, column=3, absolute_indent="", is_parenthesized=False - ), - "expected_node": cst.SimpleWhitespace("\t "), - }, - "simple_whitespace_end_of_line": { - "parser": parse_simple_whitespace, - "config": Config(lines=["prefix "], default_newline="\n"), - "start_state": State( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - "expected_node": cst.SimpleWhitespace(" "), - }, - "simple_whitespace_line_continuation": { - "parser": parse_simple_whitespace, - "config": Config( - lines=["prefix \\\n", " \\\n", " # suffix\n"], - default_newline="\n", - ), - "start_state": State( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=3, column=4, absolute_indent="", is_parenthesized=False - ), - "expected_node": cst.SimpleWhitespace(" \\\n \\\n "), - }, - "empty_lines_empty_list": { - "parser": parse_empty_lines, - "config": Config( - lines=["this is not an empty line"], default_newline="\n" - ), - "start_state": State( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - "expected_node": [], - }, - "empty_lines_single_line": { - "parser": parse_empty_lines, - "config": Config( - lines=[" # comment\n", "this is not an empty line\n"], - default_newline="\n", - ), - "start_state": State( - line=1, column=0, absolute_indent=" ", is_parenthesized=False - ), - "end_state": State( - line=2, column=0, absolute_indent=" ", is_parenthesized=False - ), - "expected_node": [ - cst.EmptyLine( - indent=True, - whitespace=cst.SimpleWhitespace(""), - comment=cst.Comment("# comment"), - newline=cst.Newline(), - ) - ], - }, - "empty_lines_multiple": { - "parser": parse_empty_lines, - "config": Config( - lines=[ - "\n", - " \n", - " # comment with indent and whitespace\n", - "# comment without indent\n", - " # comment with no indent but some whitespace\n", - ], - default_newline="\n", - ), - "start_state": State( - line=1, column=0, absolute_indent=" ", is_parenthesized=False - ), - "end_state": State( - line=5, column=47, absolute_indent=" ", is_parenthesized=False - ), - "expected_node": [ - cst.EmptyLine( - indent=False, - whitespace=cst.SimpleWhitespace(""), - comment=None, - newline=cst.Newline(), - ), - cst.EmptyLine( - indent=True, - whitespace=cst.SimpleWhitespace(""), - comment=None, - newline=cst.Newline(), - ), - cst.EmptyLine( - indent=True, - whitespace=cst.SimpleWhitespace(" "), - comment=cst.Comment("# comment with indent and whitespace"), - newline=cst.Newline(), - ), - cst.EmptyLine( - indent=False, - whitespace=cst.SimpleWhitespace(""), - comment=cst.Comment("# comment without indent"), - newline=cst.Newline(), - ), - cst.EmptyLine( - indent=False, - whitespace=cst.SimpleWhitespace(" "), - comment=cst.Comment( - "# comment with no indent but some whitespace" - ), - newline=cst.Newline(), - ), - ], - }, - "empty_lines_non_default_newline": { - "parser": parse_empty_lines, - "config": Config(lines=["\n", "\r\n", "\r"], default_newline="\n"), - "start_state": State( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=3, column=1, absolute_indent="", is_parenthesized=False - ), - "expected_node": [ - cst.EmptyLine( - indent=True, - whitespace=cst.SimpleWhitespace(""), - comment=None, - newline=cst.Newline(None), # default newline - ), - cst.EmptyLine( - indent=True, - whitespace=cst.SimpleWhitespace(""), - comment=None, - newline=cst.Newline("\r\n"), # non-default - ), - cst.EmptyLine( - indent=True, - whitespace=cst.SimpleWhitespace(""), - comment=None, - newline=cst.Newline("\r"), # non-default - ), - ], - }, - "trailing_whitespace": { - "parser": parse_trailing_whitespace, - "config": Config( - lines=["some code # comment\n"], default_newline="\n" - ), - "start_state": State( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - "end_state": State( - line=1, column=21, absolute_indent="", is_parenthesized=False - ), - "expected_node": cst.TrailingWhitespace( - whitespace=cst.SimpleWhitespace(" "), - comment=cst.Comment("# comment"), - newline=cst.Newline(), - ), - }, - } - ) - def test_parsers( - self, - parser: Callable[[Config, State], _T], - config: Config, - start_state: State, - end_state: State, - expected_node: _T, - ) -> None: - # Uses internal `deep_equals` function instead of `CSTNode.deep_equals`, because - # we need to compare sequences of nodes, and this is the easiest way. :/ - parsed_node = parser(config, start_state) - self.assertTrue( - deep_equals(parsed_node, expected_node), - msg=f"\n{parsed_node!r}\nis not deeply equal to \n{expected_node!r}", - ) - self.assertEqual(start_state, end_state) diff --git a/libcst/_parser/tests/test_wrapped_tokenize.py b/libcst/_parser/tests/test_wrapped_tokenize.py deleted file mode 100644 index dbaf3700..00000000 --- a/libcst/_parser/tests/test_wrapped_tokenize.py +++ /dev/null @@ -1,1906 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -from typing import Sequence - -from libcst._exceptions import ParserSyntaxError -from libcst._parser.parso.python.token import PythonTokenTypes -from libcst._parser.parso.utils import parse_version_string, PythonVersionInfo -from libcst._parser.types.whitespace_state import WhitespaceState -from libcst._parser.wrapped_tokenize import Token, tokenize -from libcst.testing.utils import data_provider, UnitTest - -_PY38 = parse_version_string("3.8.0") -_PY37 = parse_version_string("3.7.0") -_PY36 = parse_version_string("3.6.0") -_PY35 = parse_version_string("3.5.0") - - -class WrappedTokenizeTest(UnitTest): - maxDiff = 10000 - - @data_provider( - { - "simple_py35": ( - "pass;\n", - _PY35, - ( - Token( - type=PythonTokenTypes.NAME, - string="pass", - start_pos=(1, 0), - end_pos=(1, 4), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=";", - start_pos=(1, 4), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 5), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(2, 0), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "with_indent_py35": ( - "if foo:\n bar\n", - _PY35, - ( - Token( - type=PythonTokenTypes.NAME, - string="if", - start_pos=(1, 0), - end_pos=(1, 2), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 3), - end_pos=(1, 6), - whitespace_before=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 6), - end_pos=(1, 7), - whitespace_before=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 7), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 4), - end_pos=(2, 7), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 7), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "async_py35": ( - "async def foo():\n return await bar\n", - _PY35, - ( - Token( - type=PythonTokenTypes.ASYNC, - string="async", - start_pos=(1, 0), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="def", - start_pos=(1, 6), - end_pos=(1, 9), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 10), - end_pos=(1, 13), - whitespace_before=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string="(", - start_pos=(1, 13), - end_pos=(1, 14), - whitespace_before=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=")", - start_pos=(1, 14), - end_pos=(1, 15), - whitespace_before=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - whitespace_after=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 15), - end_pos=(1, 16), - whitespace_before=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 16), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="return", - start_pos=(2, 4), - end_pos=(2, 10), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.AWAIT, - string="await", - start_pos=(2, 11), - end_pos=(2, 16), - whitespace_before=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 17), - end_pos=(2, 20), - whitespace_before=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 20), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "async_no_token_35": ( - "async;\n", - _PY35, - ( - Token( - type=PythonTokenTypes.NAME, - string="async", - start_pos=(1, 0), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=";", - start_pos=(1, 5), - end_pos=(1, 6), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 6), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(2, 0), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "simple_py36": ( - "pass;\n", - _PY36, - ( - Token( - type=PythonTokenTypes.NAME, - string="pass", - start_pos=(1, 0), - end_pos=(1, 4), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=";", - start_pos=(1, 4), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 5), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(2, 0), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "with_indent_py36": ( - "if foo:\n bar\n", - _PY36, - ( - Token( - type=PythonTokenTypes.NAME, - string="if", - start_pos=(1, 0), - end_pos=(1, 2), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 3), - end_pos=(1, 6), - whitespace_before=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 6), - end_pos=(1, 7), - whitespace_before=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 7), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 4), - end_pos=(2, 7), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 7), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "async_py36": ( - "async def foo():\n return await bar\n", - _PY36, - ( - Token( - type=PythonTokenTypes.ASYNC, - string="async", - start_pos=(1, 0), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="def", - start_pos=(1, 6), - end_pos=(1, 9), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 10), - end_pos=(1, 13), - whitespace_before=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string="(", - start_pos=(1, 13), - end_pos=(1, 14), - whitespace_before=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=")", - start_pos=(1, 14), - end_pos=(1, 15), - whitespace_before=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - whitespace_after=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 15), - end_pos=(1, 16), - whitespace_before=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 16), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="return", - start_pos=(2, 4), - end_pos=(2, 10), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.AWAIT, - string="await", - start_pos=(2, 11), - end_pos=(2, 16), - whitespace_before=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 17), - end_pos=(2, 20), - whitespace_before=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 20), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "async_no_token_36": ( - "async;\n", - _PY36, - ( - Token( - type=PythonTokenTypes.NAME, - string="async", - start_pos=(1, 0), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=";", - start_pos=(1, 5), - end_pos=(1, 6), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 6), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(2, 0), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "simple_py37": ( - "pass;\n", - _PY37, - ( - Token( - type=PythonTokenTypes.NAME, - string="pass", - start_pos=(1, 0), - end_pos=(1, 4), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=";", - start_pos=(1, 4), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 5), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(2, 0), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "with_indent_py37": ( - "if foo:\n bar\n", - _PY37, - ( - Token( - type=PythonTokenTypes.NAME, - string="if", - start_pos=(1, 0), - end_pos=(1, 2), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 3), - end_pos=(1, 6), - whitespace_before=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 6), - end_pos=(1, 7), - whitespace_before=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 7), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 4), - end_pos=(2, 7), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 7), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "async_py37": ( - "async def foo():\n return await bar\n", - _PY37, - ( - Token( - type=PythonTokenTypes.ASYNC, - string="async", - start_pos=(1, 0), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="def", - start_pos=(1, 6), - end_pos=(1, 9), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 10), - end_pos=(1, 13), - whitespace_before=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string="(", - start_pos=(1, 13), - end_pos=(1, 14), - whitespace_before=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=")", - start_pos=(1, 14), - end_pos=(1, 15), - whitespace_before=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - whitespace_after=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 15), - end_pos=(1, 16), - whitespace_before=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 16), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="return", - start_pos=(2, 4), - end_pos=(2, 10), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.AWAIT, - string="await", - start_pos=(2, 11), - end_pos=(2, 16), - whitespace_before=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 17), - end_pos=(2, 20), - whitespace_before=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 20), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "simple_py38": ( - "pass;\n", - _PY38, - ( - Token( - type=PythonTokenTypes.NAME, - string="pass", - start_pos=(1, 0), - end_pos=(1, 4), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=";", - start_pos=(1, 4), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=4, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 5), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(2, 0), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "with_indent_py38": ( - "if foo:\n bar\n", - _PY38, - ( - Token( - type=PythonTokenTypes.NAME, - string="if", - start_pos=(1, 0), - end_pos=(1, 2), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 3), - end_pos=(1, 6), - whitespace_before=WhitespaceState( - line=1, column=2, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 6), - end_pos=(1, 7), - whitespace_before=WhitespaceState( - line=1, column=6, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 7), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, column=7, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 4), - end_pos=(2, 7), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 7), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=7, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - "async_py38": ( - "async def foo():\n return await bar\n", - _PY38, - ( - Token( - type=PythonTokenTypes.ASYNC, - string="async", - start_pos=(1, 0), - end_pos=(1, 5), - whitespace_before=WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="def", - start_pos=(1, 6), - end_pos=(1, 9), - whitespace_before=WhitespaceState( - line=1, column=5, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="foo", - start_pos=(1, 10), - end_pos=(1, 13), - whitespace_before=WhitespaceState( - line=1, column=9, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string="(", - start_pos=(1, 13), - end_pos=(1, 14), - whitespace_before=WhitespaceState( - line=1, - column=13, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=")", - start_pos=(1, 14), - end_pos=(1, 15), - whitespace_before=WhitespaceState( - line=1, column=14, absolute_indent="", is_parenthesized=True - ), - whitespace_after=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.OP, - string=":", - start_pos=(1, 15), - end_pos=(1, 16), - whitespace_before=WhitespaceState( - line=1, - column=15, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(1, 16), - end_pos=(2, 0), - whitespace_before=WhitespaceState( - line=1, - column=16, - absolute_indent="", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.INDENT, - string="", - start_pos=(2, 4), - end_pos=(2, 4), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=" ", - ), - Token( - type=PythonTokenTypes.NAME, - string="return", - start_pos=(2, 4), - end_pos=(2, 10), - whitespace_before=WhitespaceState( - line=2, - column=0, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.AWAIT, - string="await", - start_pos=(2, 11), - end_pos=(2, 16), - whitespace_before=WhitespaceState( - line=2, - column=10, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NAME, - string="bar", - start_pos=(2, 17), - end_pos=(2, 20), - whitespace_before=WhitespaceState( - line=2, - column=16, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.NEWLINE, - string="\n", - start_pos=(2, 20), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=2, - column=20, - absolute_indent=" ", - is_parenthesized=False, - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.DEDENT, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - Token( - type=PythonTokenTypes.ENDMARKER, - string="", - start_pos=(3, 0), - end_pos=(3, 0), - whitespace_before=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - whitespace_after=WhitespaceState( - line=3, column=0, absolute_indent="", is_parenthesized=False - ), - relative_indent=None, - ), - ), - ), - } - ) - def test_tokenize( - self, code: str, ver: PythonVersionInfo, expected: Sequence[Token] - ) -> None: - tokens = tuple(tokenize(code, ver)) - self.assertSequenceEqual(tokens, expected) - for a, b in zip(tokens, tokens[1:]): - # These must be the same object, so if whitespace gets consumed (mutated) at - # the end of token a, it shows up at the beginning of token b. - self.assertIs(a.whitespace_after, b.whitespace_before) - - def test_errortoken(self) -> None: - for version in [_PY36, _PY37, _PY38]: - with self.assertRaisesRegex(ParserSyntaxError, "not a valid token"): - # use tuple() to read everything - # The copyright symbol isn't a valid token - tuple(tokenize("\u00a9", version)) - - def test_error_dedent(self) -> None: - for version in [_PY36, _PY37, _PY38]: - with self.assertRaisesRegex(ParserSyntaxError, "Inconsistent indentation"): - # create some inconsistent indents to generate an ERROR_DEDENT token - tuple(tokenize(" a\n b", version)) diff --git a/libcst/_parser/whitespace_parser.py b/libcst/_parser/whitespace_parser.py deleted file mode 100644 index 1c29efc5..00000000 --- a/libcst/_parser/whitespace_parser.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -""" -Parso doesn't attempt to parse (or even emit tokens for) whitespace or comments that -aren't syntatically important. Instead, we're just given the whitespace as a "prefix" of -the token. - -However, in our CST, whitespace is gathered into far more detailed objects than a simple -str. - -Fortunately this isn't hard for us to parse ourselves, so we just use our own -hand-rolled recursive descent parser. -""" - -try: - # It'd be better to do `from libcst_native.whitespace_parser import *`, but we're - # blocked on https://github.com/PyO3/pyo3/issues/759 - # (which ultimately seems to be a limitation of how importlib works) - from libcst_native import whitespace_parser as mod -except ImportError: - from libcst._parser import py_whitespace_parser as mod - -parse_simple_whitespace = mod.parse_simple_whitespace -parse_empty_lines = mod.parse_empty_lines -parse_trailing_whitespace = mod.parse_trailing_whitespace -parse_parenthesizable_whitespace = mod.parse_parenthesizable_whitespace diff --git a/libcst/_parser/wrapped_tokenize.py b/libcst/_parser/wrapped_tokenize.py deleted file mode 100644 index 8d601052..00000000 --- a/libcst/_parser/wrapped_tokenize.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -""" -Parso's tokenize doesn't give us tokens in the format that we'd ideally like, so this -performs a small number of transformations to the token stream: - -- `end_pos` is precomputed as a property, instead of lazily as a method, for more - efficient access. -- `whitespace_before` and `whitespace_after` have been added. These include the correct - indentation information. -- `prefix` is removed, since we don't use it anywhere. -- `ERRORTOKEN` and `ERROR_DEDENT` have been removed, because we don't intend to support - error recovery. If we encounter token errors, we'll raise a ParserSyntaxError instead. - -If performance becomes a concern, we can rewrite this later as a fork of the original -tokenize module, instead of as a wrapper. -""" - -from dataclasses import dataclass, field -from enum import Enum -from typing import Generator, Iterator, List, Optional, Sequence - -from libcst._add_slots import add_slots -from libcst._exceptions import ParserSyntaxError -from libcst._parser.parso.python.token import PythonTokenTypes, TokenType -from libcst._parser.parso.python.tokenize import ( - Token as OrigToken, - tokenize_lines as orig_tokenize_lines, -) -from libcst._parser.parso.utils import PythonVersionInfo, split_lines -from libcst._parser.types.token import Token -from libcst._parser.types.whitespace_state import WhitespaceState - -_ERRORTOKEN: TokenType = PythonTokenTypes.ERRORTOKEN -_ERROR_DEDENT: TokenType = PythonTokenTypes.ERROR_DEDENT - -_INDENT: TokenType = PythonTokenTypes.INDENT -_DEDENT: TokenType = PythonTokenTypes.DEDENT -_ENDMARKER: TokenType = PythonTokenTypes.ENDMARKER - -_FSTRING_START: TokenType = PythonTokenTypes.FSTRING_START -_FSTRING_END: TokenType = PythonTokenTypes.FSTRING_END - -_OP: TokenType = PythonTokenTypes.OP - - -class _ParenthesisOrFStringStackEntry(Enum): - PARENTHESIS = 0 - FSTRING = 0 - - -_PARENTHESIS_STACK_ENTRY: _ParenthesisOrFStringStackEntry = ( - _ParenthesisOrFStringStackEntry.PARENTHESIS -) -_FSTRING_STACK_ENTRY: _ParenthesisOrFStringStackEntry = ( - _ParenthesisOrFStringStackEntry.FSTRING -) - - -@add_slots -@dataclass(frozen=False) -class _TokenizeState: - lines: Sequence[str] - previous_whitespace_state: WhitespaceState = field( - default_factory=lambda: WhitespaceState( - line=1, column=0, absolute_indent="", is_parenthesized=False - ) - ) - indents: List[str] = field(default_factory=lambda: [""]) - parenthesis_or_fstring_stack: List[_ParenthesisOrFStringStackEntry] = field( - default_factory=list - ) - - -def tokenize(code: str, version_info: PythonVersionInfo) -> Iterator[Token]: - try: - from libcst_native import tokenize as native_tokenize - - return native_tokenize.tokenize(code) - except ImportError: - lines = split_lines(code, keepends=True) - return tokenize_lines(code, lines, version_info) - - -def tokenize_lines( - code: str, lines: Sequence[str], version_info: PythonVersionInfo -) -> Iterator[Token]: - try: - from libcst_native import tokenize as native_tokenize - - # TODO: pass through version_info - return native_tokenize.tokenize(code) - except ImportError: - return tokenize_lines_py(code, lines, version_info) - - -def tokenize_lines_py( - code: str, lines: Sequence[str], version_info: PythonVersionInfo -) -> Generator[Token, None, None]: - state = _TokenizeState(lines) - orig_tokens_iter = iter(orig_tokenize_lines(lines, version_info)) - - # Iterate over the tokens and pass them to _convert_token, providing a one-token - # lookahead, to enable proper indent handling. - try: - curr_token = next(orig_tokens_iter) - except StopIteration: - pass # empty file - else: - for next_token in orig_tokens_iter: - yield _convert_token(state, curr_token, next_token) - curr_token = next_token - yield _convert_token(state, curr_token, None) - - -def _convert_token( # noqa: C901: too complex - state: _TokenizeState, curr_token: OrigToken, next_token: Optional[OrigToken] -) -> Token: - ct_type = curr_token.type - ct_string = curr_token.string - ct_start_pos = curr_token.start_pos - if ct_type is _ERRORTOKEN: - raise ParserSyntaxError( - f"{ct_string!r} is not a valid token.", - lines=state.lines, - raw_line=ct_start_pos[0], - raw_column=ct_start_pos[1], - ) - if ct_type is _ERROR_DEDENT: - raise ParserSyntaxError( - "Inconsistent indentation. Expected a dedent.", - lines=state.lines, - raw_line=ct_start_pos[0], - raw_column=ct_start_pos[1], - ) - - # Compute relative indent changes for indent/dedent nodes - relative_indent: Optional[str] = None - if ct_type is _INDENT: - old_indent = "" if len(state.indents) < 2 else state.indents[-2] - new_indent = state.indents[-1] - relative_indent = new_indent[len(old_indent) :] - - if next_token is not None: - nt_type = next_token.type - if nt_type is _INDENT: - nt_line, nt_column = next_token.start_pos - state.indents.append(state.lines[nt_line - 1][:nt_column]) - elif nt_type is _DEDENT: - state.indents.pop() - - whitespace_before = state.previous_whitespace_state - - if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER: - # Don't update whitespace state for these dummy tokens. This makes it possible - # to partially parse whitespace for IndentedBlock footers, and then parse the - # rest of the whitespace in the following statement's leading_lines. - # Unfortunately, that means that the indentation is either wrong for the footer - # comments, or for the next line. We've chosen to allow it to be wrong for the - # IndentedBlock footer and manually override the state when parsing whitespace - # in that particular node. - whitespace_after = whitespace_before - ct_end_pos = ct_start_pos - else: - # Not a dummy token, so update the whitespace state. - - # Compute our own end_pos, since parso's end_pos is wrong for triple-strings. - lines = split_lines(ct_string) - if len(lines) > 1: - ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1]) - else: - ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string)) - - # Figure out what mode the whitespace parser should use. If we're inside - # parentheses, certain whitespace (e.g. newlines) are allowed where they would - # otherwise not be. f-strings override and disable this behavior, however. - # - # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to - # duplicate that logic here. - - pof_stack = state.parenthesis_or_fstring_stack - try: - if ct_type is _FSTRING_START: - pof_stack.append(_FSTRING_STACK_ENTRY) - elif ct_type is _FSTRING_END: - pof_stack.pop() - elif ct_type is _OP: - if ct_string in "([{": - pof_stack.append(_PARENTHESIS_STACK_ENTRY) - elif ct_string in ")]}": - pof_stack.pop() - except IndexError: - # pof_stack may be empty by the time we need to read from it due to - # mismatched braces. - raise ParserSyntaxError( - "Encountered a closing brace without a matching opening brace.", - lines=state.lines, - raw_line=ct_start_pos[0], - raw_column=ct_start_pos[1], - ) - is_parenthesized = ( - len(pof_stack) > 0 and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY - ) - - whitespace_after = WhitespaceState( - ct_end_pos[0], ct_end_pos[1], state.indents[-1], is_parenthesized - ) - - # Hold onto whitespace_after, so we can use it as whitespace_before in the next - # node. - state.previous_whitespace_state = whitespace_after - - return Token( - ct_type, - ct_string, - ct_start_pos, - ct_end_pos, - whitespace_before, - whitespace_after, - relative_indent, - ) From efac9f9df464bf40f43b74791596c512194d5e23 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 20:02:11 +0000 Subject: [PATCH 07/12] lint: formater --- libcst/_parser/__init__.py | 1 - libcst/_parser/detect_config.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/libcst/_parser/__init__.py b/libcst/_parser/__init__.py index ad574fef..7bec24cb 100644 --- a/libcst/_parser/__init__.py +++ b/libcst/_parser/__init__.py @@ -2,4 +2,3 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -__all__= [] \ No newline at end of file diff --git a/libcst/_parser/detect_config.py b/libcst/_parser/detect_config.py index c989bc73..0c2a82aa 100644 --- a/libcst/_parser/detect_config.py +++ b/libcst/_parser/detect_config.py @@ -11,7 +11,6 @@ from libcst._parser.types.config import AutoConfig, PartialParserConfig - def _detect_encoding(source: Union[str, bytes]) -> str: """ Detects the encoding from the presence of a UTF-8 BOM or an encoding cookie as @@ -40,4 +39,3 @@ def convert_to_utf8( source_str = source if isinstance(source, str) else source.decode(encoding) return (encoding, source_str) - From 67a1d827cec5403b5952e837e1c9c46faf164774 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 16:09:47 -0400 Subject: [PATCH 08/12] fix: format --- libcst/_nodes/tests/test_module.py | 1 - libcst/_nodes/tests/test_try.py | 1 - libcst/_nodes/tests/test_tuple.py | 1 - libcst/_nodes/tests/test_type_alias.py | 1 - libcst/_nodes/tests/test_with.py | 1 - libcst/_nodes/tests/test_yield.py | 1 - 6 files changed, 6 deletions(-) diff --git a/libcst/_nodes/tests/test_module.py b/libcst/_nodes/tests/test_module.py index 40de8f8e..e4d40471 100644 --- a/libcst/_nodes/tests/test_module.py +++ b/libcst/_nodes/tests/test_module.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_module, parse_statement from libcst._nodes.tests.base import CSTNodeTest - from libcst.metadata import CodeRange, MetadataWrapper, PositionProvider from libcst.testing.utils import data_provider diff --git a/libcst/_nodes/tests/test_try.py b/libcst/_nodes/tests/test_try.py index 8aea3643..1b3f8558 100644 --- a/libcst/_nodes/tests/test_try.py +++ b/libcst/_nodes/tests/test_try.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest, DummyIndentedBlock - from libcst.metadata import CodeRange from libcst.testing.utils import data_provider diff --git a/libcst/_nodes/tests/test_tuple.py b/libcst/_nodes/tests/test_tuple.py index 78d16b8c..aa3d68bb 100644 --- a/libcst/_nodes/tests/test_tuple.py +++ b/libcst/_nodes/tests/test_tuple.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_expression, parse_statement from libcst._nodes.tests.base import CSTNodeTest, parse_expression_as - from libcst.metadata import CodeRange from libcst.testing.utils import data_provider diff --git a/libcst/_nodes/tests/test_type_alias.py b/libcst/_nodes/tests/test_type_alias.py index 3dc0ca68..865135c1 100644 --- a/libcst/_nodes/tests/test_type_alias.py +++ b/libcst/_nodes/tests/test_type_alias.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest - from libcst.metadata import CodeRange from libcst.testing.utils import data_provider diff --git a/libcst/_nodes/tests/test_with.py b/libcst/_nodes/tests/test_with.py index e775ebad..dfb0a272 100644 --- a/libcst/_nodes/tests/test_with.py +++ b/libcst/_nodes/tests/test_with.py @@ -9,7 +9,6 @@ from libcst import parse_statement, PartialParserConfig from libcst._maybe_sentinel import MaybeSentinel from libcst._nodes.tests.base import CSTNodeTest, DummyIndentedBlock, parse_statement_as - from libcst.metadata import CodeRange from libcst.testing.utils import data_provider diff --git a/libcst/_nodes/tests/test_yield.py b/libcst/_nodes/tests/test_yield.py index aacb2da0..e5085b4d 100644 --- a/libcst/_nodes/tests/test_yield.py +++ b/libcst/_nodes/tests/test_yield.py @@ -8,7 +8,6 @@ import libcst as cst from libcst import parse_statement from libcst._nodes.tests.base import CSTNodeTest, parse_statement_as - from libcst.helpers import ensure_type from libcst.metadata import CodeRange from libcst.testing.utils import data_provider From e75208b72bad50aaca0aed1b250f49e07dad2af5 Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 20:28:11 +0000 Subject: [PATCH 09/12] rm: nuke 2 --- libcst/_nodes/tests/test_with.py | 1 - libcst/_parser/conversions/README.md | 209 --- libcst/_parser/conversions/__init__.py | 4 - libcst/_parser/conversions/expression.py | 1630 ------------------- libcst/_parser/conversions/module.py | 46 - libcst/_parser/conversions/params.py | 346 ---- libcst/_parser/conversions/statement.py | 1381 ---------------- libcst/_parser/conversions/terminals.py | 83 - libcst/_parser/entrypoints.py | 4 +- libcst/_parser/types/__init__.py | 4 - libcst/_parser/types/conversions.py | 14 - libcst/_parser/types/partials.py | 160 -- libcst/_parser/types/production.py | 19 - libcst/_parser/types/py_token.py | 27 - libcst/_parser/types/py_whitespace_state.py | 36 - libcst/_parser/types/token.py | 12 - libcst/_parser/types/whitespace_state.py | 15 - libcst/tests/__main__.py | 2 +- 18 files changed, 2 insertions(+), 3991 deletions(-) delete mode 100644 libcst/_parser/conversions/README.md delete mode 100644 libcst/_parser/conversions/__init__.py delete mode 100644 libcst/_parser/conversions/expression.py delete mode 100644 libcst/_parser/conversions/module.py delete mode 100644 libcst/_parser/conversions/params.py delete mode 100644 libcst/_parser/conversions/statement.py delete mode 100644 libcst/_parser/conversions/terminals.py delete mode 100644 libcst/_parser/types/__init__.py delete mode 100644 libcst/_parser/types/conversions.py delete mode 100644 libcst/_parser/types/partials.py delete mode 100644 libcst/_parser/types/production.py delete mode 100644 libcst/_parser/types/py_token.py delete mode 100644 libcst/_parser/types/py_whitespace_state.py delete mode 100644 libcst/_parser/types/token.py delete mode 100644 libcst/_parser/types/whitespace_state.py diff --git a/libcst/_nodes/tests/test_with.py b/libcst/_nodes/tests/test_with.py index dfb0a272..0b396619 100644 --- a/libcst/_nodes/tests/test_with.py +++ b/libcst/_nodes/tests/test_with.py @@ -7,7 +7,6 @@ import libcst as cst from libcst import parse_statement, PartialParserConfig -from libcst._maybe_sentinel import MaybeSentinel from libcst._nodes.tests.base import CSTNodeTest, DummyIndentedBlock, parse_statement_as from libcst.metadata import CodeRange from libcst.testing.utils import data_provider diff --git a/libcst/_parser/conversions/README.md b/libcst/_parser/conversions/README.md deleted file mode 100644 index 798e3d18..00000000 --- a/libcst/_parser/conversions/README.md +++ /dev/null @@ -1,209 +0,0 @@ -# Parser Conversions Developer Guide - -Parser conversions take grammar productions and convert them to CST nodes, or to some -"partial" value that will later be converted to a CST node. - -The grammar production that parser conversions are associated with is co-located -alongside the conversion function using our `@with_production` decorator. This is -similar to the API that [rply](https://github.com/alex/rply/) uses. - -Grammar productions are collected when the parser is first called, and converted into a -state machine by Parso's pgen2 fork. - -Unlike rply's API, productions are not automatically gathered, because that would be -dependent on implicit import-time side-effects. Instead all conversion functions must be -listed in `_grammar.py`. - -# What's a production? - -A production is a line in our BNF-like grammar definition. A production has a name (the -first argument of `@with_production`), and a sequence of children (the second argument -of `@with_production`). - -Python's full grammar is here: https://docs.python.org/3/reference/grammar.html - -We use Parso's fork of pgen2, and therefore support the same BNF-like syntax that -Python's documentation uses. - -# Why is everything `Any`-typed? Isn't that bad? - -Yes, `Any` types indicate a gap in static type coverage. Unfortunately, this isn't -easily solved. - -The value of `children` given to a conversion function is dependent on textual grammar -representation and pgen2's implementation, which the type system is unaware of. Unless -we extend the type system to support pgen2 (unlikely) or add a layer of -machine-generated code (possible, but we're not there), there's no way for the type -system to validate any annotations on `children`. - -We could add annotations to `children`, but they're usually complicated types (so they -wouldn't be very human-readable), and they wouldn't actually provide any type safety -because the type checker doesn't know about them. - -Similarly, we could annotate return type annotations, but that's just duplicating the -type we're already expressing in our return statement (so it doesn't improve readability -much), and it's not providing any static type safety. - -We do perform runtime type checks inside tests, and we hope that this test coverage will -help compensate for the lack of static type safety. - -# Where's the whitespace? - -The most important differentiation between an Abstract Syntax Tree and a Concrete Syntax -Tree (for our purposes) is that the CST contains enough information to exactly reproduce -the original program. This means that we must somehow capture and store whitespace. - -The grammar does not contain whitespace information, and there are no explicit tokens -for whitespace. If the grammar did contain whitespace information, the grammar likely -wouldn't be LL(1), and while we could use another context free grammar parsing -algorithm, it would add complexity and likely wouldn't be as efficient. - -Instead, we have a hand-written re-entrant recursive-descent parser for whitespace. It's -the responsibility of conversion functions to call into this parser given whitespace -states before and after a token. - -# Token and WhitespaceState Data Structures - -A token is defined as: - -``` -class Token: - type: TokenType - string: str - # The start of where `string` is in the source, not including leading whitespace. - start_pos: Tuple[int, int] - # The end of where `string` is in the source, not including trailing whitespace. - end_pos: Tuple[int, int] - whitespace_before: WhitespaceState - whitespace_after: WhitespaceState -``` - -Or, in the order that these pieces appear lexically in a parsed program: - -``` -+-------------------+--------+-------------------+ -| whitespace_before | string | whitespace_after | -| (WhitespaceState) | (str) | (WhitespaceState) | -+-------------------+--------+-------------------+ -``` - -Tokens are immutable, but only shallowly, because their whitespace fields are mutable -WhitespaceState objects. - -WhitespaceStates are opaque objects that the whitespace parser consumes and mutates. -WhitespaceState nodes are shared across multiple tokens, so `whitespace_after` is the -same object as `whitespace_before` in the next token. - -# Parser Execution Order - -The parser generator we use (`pgen2`) is bottom-up, meaning that children productions -are called before their parents. In contrast, our hand written whitespace parser is -top-down. - -Inside each production, child conversion functions are called from left to right. - -As an example, assume we're given the following simple grammar and program: - -``` -add_expr: NUMBER ['+' add_expr] -``` - -``` -1 + 2 + 3 -``` - -which forms the parse tree: - -``` - [H] add_expr - / | \ -[A] 1 [B] '+' [G] add_expr - / | \ - [C] 2 [D] '+' [F] add_expr - | - [E] 3 -``` - -The conversion functions would be called in the labeled alphabetical order, with `A` -converted first, and `H` converted last. - -# Who owns whitespace? - -There's a lot of holes between you and a correct whitespace representation, but these -can be divided into a few categories of potential mistakes: - -## Forgetting to Parse Whitespace - -Fortunately, the inverse (parsing the same whitespace twice) should not be possible, -because whitespace is "consumed" by the whitespace parser. - -This kind of mistake is easily caught with tests. - -## Assigning Whitespace to the Wrong Owner - -This is probably the easiest mistake to make. The general convention is that the -top-most possible node owns whitespace, but in a bottom-up parser like ours, the -children are parsed before their parents. - -In contrast, the best owner for whitespace in our tree when there's multiple possible -owners is usually the top-most node. - -As an example, assume we have the following grammar and program: - -``` -simple_stmt: (pass_stmt ';')* NEWLINE -``` - -``` -pass; # comment -``` - -Since both `cst.Semicolon` and `cst.SimpleStatement` can both store some whitespace -after themselves, there's some ambiguity about who should own the space character before -the comment. However, since `cst.SimpleStatement` is the parent, the convention is that -it should own it. - -Unfortunately, since nodes are processed bottom-to-top and left-to-right, the semicolon -under `simple_stmt` will get processed before `simple_stmt` is. This means that in a -naive implementation, the semicolon's conversion function would have a chance to consume -the whitespace before `simple_stmt` can. - -To solve this problem, you must "fix" the whitespace in the parent node's conversion -function or grammar. This can be done in a number of ways. In order of preference: - -1. Split the child's grammar production into two separate productions, one that consumes - it's leading or trailing whitespace, and one that doesn't. Depending on the parent, - use the appropriate version of the child. -2. Construct a "partial" node in the child that doesn't consume the whitespace, and then - consume the correct whitespace in the parent. Be careful about what whitespace a - node's siblings consume. -3. "Steal" the whitespace from the child by replacing the child with a new version that - doesn't have the whitespace. - -This mistake is probably hard to catch with tests, because the CST will still reprint -correctly, but it creates ergonomic issues for tools consuming the CST. - -## Consuming Whitespace in the Wrong Order - -This mistake is probably is the hardest to make by accident, but it could still happen, -and may be hard to catch with tests. - -Given the following piece of code: - -``` -pass # trailing -# empty line -pass -``` - -The first statement should own `# trailing` (parsed using `parse_trailing_whitespace`). -The second statement then should `# empty line` (parsed using `parse_empty_lines`). - -However, it's possible that if you somehow called `parse_empty_lines` on the second -statement before calling `parse_trailing_whitespace` on the first statement, -`parse_empty_lines` could accidentally end up consuming the `# trailing` comment, -because `parse_trailing_whitespace` hasn't yet consumed it. - -However, this circumstance is unlikely, because you'd explicitly have to handle the -children out-of-order, and we have assertions inside the whitespace parser to prevent -some possible mistakes, like the one described above. diff --git a/libcst/_parser/conversions/__init__.py b/libcst/_parser/conversions/__init__.py deleted file mode 100644 index 7bec24cb..00000000 --- a/libcst/_parser/conversions/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. diff --git a/libcst/_parser/conversions/expression.py b/libcst/_parser/conversions/expression.py deleted file mode 100644 index 79d7ad78..00000000 --- a/libcst/_parser/conversions/expression.py +++ /dev/null @@ -1,1630 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -import re -import typing -from tokenize import ( - Floatnumber as FLOATNUMBER_RE, - Imagnumber as IMAGNUMBER_RE, - Intnumber as INTNUMBER_RE, -) - -from libcst import CSTLogicError -from libcst._exceptions import ParserSyntaxError, PartialParserSyntaxError -from libcst._maybe_sentinel import MaybeSentinel -from libcst._nodes.expression import ( - Arg, - Asynchronous, - Attribute, - Await, - BinaryOperation, - BooleanOperation, - Call, - Comparison, - ComparisonTarget, - CompFor, - CompIf, - ConcatenatedString, - Dict, - DictComp, - DictElement, - Element, - Ellipsis, - Float, - FormattedString, - FormattedStringExpression, - FormattedStringText, - From, - GeneratorExp, - IfExp, - Imaginary, - Index, - Integer, - Lambda, - LeftCurlyBrace, - LeftParen, - LeftSquareBracket, - List, - ListComp, - Name, - NamedExpr, - Param, - Parameters, - RightCurlyBrace, - RightParen, - RightSquareBracket, - Set, - SetComp, - Slice, - StarredDictElement, - StarredElement, - Subscript, - SubscriptElement, - Tuple, - UnaryOperation, - Yield, -) -from libcst._nodes.op import ( - Add, - And, - AssignEqual, - BaseBinaryOp, - BaseBooleanOp, - BaseCompOp, - BitAnd, - BitInvert, - BitOr, - BitXor, - Colon, - Comma, - Divide, - Dot, - Equal, - FloorDivide, - GreaterThan, - GreaterThanEqual, - In, - Is, - IsNot, - LeftShift, - LessThan, - LessThanEqual, - MatrixMultiply, - Minus, - Modulo, - Multiply, - Not, - NotEqual, - NotIn, - Or, - Plus, - Power, - RightShift, - Subtract, -) -from libcst._nodes.whitespace import SimpleWhitespace -from libcst._parser.custom_itertools import grouper -from libcst._parser.production_decorator import with_production -from libcst._parser.types.config import ParserConfig -from libcst._parser.types.partials import ( - ArglistPartial, - AttributePartial, - CallPartial, - FormattedStringConversionPartial, - FormattedStringFormatSpecPartial, - SlicePartial, - SubscriptPartial, - WithLeadingWhitespace, -) -from libcst._parser.types.token import Token -from libcst._parser.whitespace_parser import parse_parenthesizable_whitespace - -BINOP_TOKEN_LUT: typing.Dict[str, typing.Type[BaseBinaryOp]] = { - "*": Multiply, - "@": MatrixMultiply, - "/": Divide, - "%": Modulo, - "//": FloorDivide, - "+": Add, - "-": Subtract, - "<<": LeftShift, - ">>": RightShift, - "&": BitAnd, - "^": BitXor, - "|": BitOr, -} - - -BOOLOP_TOKEN_LUT: typing.Dict[str, typing.Type[BaseBooleanOp]] = {"and": And, "or": Or} - - -COMPOP_TOKEN_LUT: typing.Dict[str, typing.Type[BaseCompOp]] = { - "<": LessThan, - ">": GreaterThan, - "==": Equal, - "<=": LessThanEqual, - ">=": GreaterThanEqual, - "in": In, - "is": Is, -} - - -# N.B. This uses a `testlist | star_expr`, not a `testlist_star_expr` because -# `testlist_star_expr` may not always be representable by a non-partial node, since it's -# only used as part of `expr_stmt`. -@with_production("expression_input", "(testlist | star_expr) ENDMARKER") -def convert_expression_input( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child, endmarker) = children - # HACK: UGLY! REMOVE THIS SOON! - # Unwrap WithLeadingWhitespace if it exists. It shouldn't exist by this point, but - # testlist isn't fully implemented, and we currently leak these partial objects. - if isinstance(child, WithLeadingWhitespace): - child = child.value - return child - - -@with_production("namedexpr_test", "test [':=' test]", version=">=3.8") -def convert_namedexpr_test( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - test, *assignment = children - if len(assignment) == 0: - return test - - # Convert all of the operations that have no precedence in a loop - (walrus, value) = assignment - return WithLeadingWhitespace( - NamedExpr( - target=test.value, - whitespace_before_walrus=parse_parenthesizable_whitespace( - config, walrus.whitespace_before - ), - whitespace_after_walrus=parse_parenthesizable_whitespace( - config, walrus.whitespace_after - ), - value=value.value, - ), - test.whitespace_before, - ) - - -@with_production("test", "or_test ['if' or_test 'else' test] | lambdef") -def convert_test( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (child,) = children - return child - else: - (body, if_token, test, else_token, orelse) = children - return WithLeadingWhitespace( - IfExp( - body=body.value, - test=test.value, - orelse=orelse.value, - whitespace_before_if=parse_parenthesizable_whitespace( - config, if_token.whitespace_before - ), - whitespace_after_if=parse_parenthesizable_whitespace( - config, if_token.whitespace_after - ), - whitespace_before_else=parse_parenthesizable_whitespace( - config, else_token.whitespace_before - ), - whitespace_after_else=parse_parenthesizable_whitespace( - config, else_token.whitespace_after - ), - ), - body.whitespace_before, - ) - - -@with_production("test_nocond", "or_test | lambdef_nocond") -def convert_test_nocond( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - return child - - -@with_production("lambdef", "'lambda' [varargslist] ':' test") -@with_production("lambdef_nocond", "'lambda' [varargslist] ':' test_nocond") -def convert_lambda( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - lambdatoken, *params, colontoken, test = children - - # Grab the whitespace around the colon. If there are no params, then - # the colon owns the whitespace before and after it. If there are - # any params, then the last param owns the whitespace before the colon. - # We handle the parameter movement below. - colon = Colon( - whitespace_before=parse_parenthesizable_whitespace( - config, colontoken.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, colontoken.whitespace_after - ), - ) - - # Unpack optional parameters - if len(params) == 0: - parameters = Parameters() - whitespace_after_lambda = MaybeSentinel.DEFAULT - else: - (parameters,) = params - whitespace_after_lambda = parse_parenthesizable_whitespace( - config, lambdatoken.whitespace_after - ) - - # Handle pre-colon whitespace - if parameters.star_kwarg is not None: - if parameters.star_kwarg.comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - star_kwarg=parameters.star_kwarg.with_changes( - whitespace_after_param=colon.whitespace_before - ) - ) - elif parameters.kwonly_params: - if parameters.kwonly_params[-1].comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - kwonly_params=( - *parameters.kwonly_params[:-1], - parameters.kwonly_params[-1].with_changes( - whitespace_after_param=colon.whitespace_before - ), - ) - ) - elif isinstance(parameters.star_arg, Param): - if parameters.star_arg.comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - star_arg=parameters.star_arg.with_changes( - whitespace_after_param=colon.whitespace_before - ) - ) - elif parameters.params: - if parameters.params[-1].comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - params=( - *parameters.params[:-1], - parameters.params[-1].with_changes( - whitespace_after_param=colon.whitespace_before - ), - ) - ) - - # Colon doesn't own its own pre-whitespace now. - colon = colon.with_changes(whitespace_before=SimpleWhitespace("")) - - # Return a lambda - return WithLeadingWhitespace( - Lambda( - whitespace_after_lambda=whitespace_after_lambda, - params=parameters, - body=test.value, - colon=colon, - ), - lambdatoken.whitespace_before, - ) - - -@with_production("or_test", "and_test ('or' and_test)*") -@with_production("and_test", "not_test ('and' not_test)*") -def convert_boolop( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - leftexpr, *rightexprs = children - if len(rightexprs) == 0: - return leftexpr - - whitespace_before = leftexpr.whitespace_before - leftexpr = leftexpr.value - - # Convert all of the operations that have no precedence in a loop - for op, rightexpr in grouper(rightexprs, 2): - if op.string not in BOOLOP_TOKEN_LUT: - raise ParserSyntaxError( - f"Unexpected token '{op.string}'!", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - leftexpr = BooleanOperation( - left=leftexpr, - # pyre-ignore Pyre thinks that the type of the LUT is CSTNode. - operator=BOOLOP_TOKEN_LUT[op.string]( - whitespace_before=parse_parenthesizable_whitespace( - config, op.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ), - ), - right=rightexpr.value, - ) - return WithLeadingWhitespace(leftexpr, whitespace_before) - - -@with_production("not_test", "'not' not_test | comparison") -def convert_not_test( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (child,) = children - return child - else: - nottoken, nottest = children - return WithLeadingWhitespace( - UnaryOperation( - operator=Not( - whitespace_after=parse_parenthesizable_whitespace( - config, nottoken.whitespace_after - ) - ), - expression=nottest.value, - ), - nottoken.whitespace_before, - ) - - -@with_production("comparison", "expr (comp_op expr)*") -def convert_comparison( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (child,) = children - return child - - lhs, *rest = children - - comparisons: typing.List[ComparisonTarget] = [] - for operator, comparator in grouper(rest, 2): - comparisons.append( - ComparisonTarget(operator=operator, comparator=comparator.value) - ) - - return WithLeadingWhitespace( - Comparison(left=lhs.value, comparisons=tuple(comparisons)), - lhs.whitespace_before, - ) - - -@with_production( - "comp_op", "('<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not')" -) -def convert_comp_op( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (op,) = children - if op.string in COMPOP_TOKEN_LUT: - # A regular comparison containing one token - # pyre-ignore Pyre thinks that the type of the LUT is CSTNode. - return COMPOP_TOKEN_LUT[op.string]( - whitespace_before=parse_parenthesizable_whitespace( - config, op.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ), - ) - elif op.string in ["!=", "<>"]: - # Not equal, which can take two forms in some cases - return NotEqual( - whitespace_before=parse_parenthesizable_whitespace( - config, op.whitespace_before - ), - value=op.string, - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ), - ) - else: - # this should be unreachable - raise ParserSyntaxError( - f"Unexpected token '{op.string}'!", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - else: - # A two-token comparison - leftcomp, rightcomp = children - - if leftcomp.string == "not" and rightcomp.string == "in": - return NotIn( - whitespace_before=parse_parenthesizable_whitespace( - config, leftcomp.whitespace_before - ), - whitespace_between=parse_parenthesizable_whitespace( - config, leftcomp.whitespace_after - ), - whitespace_after=parse_parenthesizable_whitespace( - config, rightcomp.whitespace_after - ), - ) - elif leftcomp.string == "is" and rightcomp.string == "not": - return IsNot( - whitespace_before=parse_parenthesizable_whitespace( - config, leftcomp.whitespace_before - ), - whitespace_between=parse_parenthesizable_whitespace( - config, leftcomp.whitespace_after - ), - whitespace_after=parse_parenthesizable_whitespace( - config, rightcomp.whitespace_after - ), - ) - else: - # this should be unreachable - raise ParserSyntaxError( - f"Unexpected token '{leftcomp.string} {rightcomp.string}'!", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - - -@with_production("star_expr", "'*' expr") -def convert_star_expr( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - star, expr = children - return WithLeadingWhitespace( - StarredElement( - expr.value, - whitespace_before_value=parse_parenthesizable_whitespace( - config, expr.whitespace_before - ), - # atom is responsible for parenthesis and trailing_whitespace if they exist - # testlist_comp, exprlist, dictorsetmaker, etc are responsible for the comma - # if it exists. - ), - whitespace_before=star.whitespace_before, - ) - - -@with_production("expr", "xor_expr ('|' xor_expr)*") -@with_production("xor_expr", "and_expr ('^' and_expr)*") -@with_production("and_expr", "shift_expr ('&' shift_expr)*") -@with_production("shift_expr", "arith_expr (('<<'|'>>') arith_expr)*") -@with_production("arith_expr", "term (('+'|'-') term)*") -@with_production("term", "factor (('*'|'@'|'/'|'%'|'//') factor)*", version=">=3.5") -@with_production("term", "factor (('*'|'/'|'%'|'//') factor)*", version="<3.5") -def convert_binop( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - leftexpr, *rightexprs = children - if len(rightexprs) == 0: - return leftexpr - - whitespace_before = leftexpr.whitespace_before - leftexpr = leftexpr.value - - # Convert all of the operations that have no precedence in a loop - for op, rightexpr in grouper(rightexprs, 2): - if op.string not in BINOP_TOKEN_LUT: - raise ParserSyntaxError( - f"Unexpected token '{op.string}'!", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - leftexpr = BinaryOperation( - left=leftexpr, - # pyre-ignore Pyre thinks that the type of the LUT is CSTNode. - operator=BINOP_TOKEN_LUT[op.string]( - whitespace_before=parse_parenthesizable_whitespace( - config, op.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ), - ), - right=rightexpr.value, - ) - return WithLeadingWhitespace(leftexpr, whitespace_before) - - -@with_production("factor", "('+'|'-'|'~') factor | power") -def convert_factor( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (child,) = children - return child - - op, factor = children - - # First, tokenize the unary operator - if op.string == "+": - opnode = Plus( - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ) - ) - elif op.string == "-": - opnode = Minus( - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ) - ) - elif op.string == "~": - opnode = BitInvert( - whitespace_after=parse_parenthesizable_whitespace( - config, op.whitespace_after - ) - ) - else: - raise ParserSyntaxError( - f"Unexpected token '{op.string}'!", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - - return WithLeadingWhitespace( - UnaryOperation(operator=opnode, expression=factor.value), op.whitespace_before - ) - - -@with_production("power", "atom_expr ['**' factor]") -def convert_power( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (child,) = children - return child - - left, power, right = children - return WithLeadingWhitespace( - BinaryOperation( - left=left.value, - operator=Power( - whitespace_before=parse_parenthesizable_whitespace( - config, power.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, power.whitespace_after - ), - ), - right=right.value, - ), - left.whitespace_before, - ) - - -@with_production("atom_expr", "atom_expr_await | atom_expr_trailer") -def convert_atom_expr( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - return child - - -@with_production("atom_expr_await", "AWAIT atom_expr_trailer") -def convert_atom_expr_await( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - keyword, expr = children - return WithLeadingWhitespace( - Await( - whitespace_after_await=parse_parenthesizable_whitespace( - config, keyword.whitespace_after - ), - expression=expr.value, - ), - keyword.whitespace_before, - ) - - -@with_production("atom_expr_trailer", "atom trailer*") -def convert_atom_expr_trailer( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - atom, *trailers = children - whitespace_before = atom.whitespace_before - atom = atom.value - - # Need to walk through all trailers from left to right and construct - # a series of nodes based on each partial type. We can't do this with - # left recursion due to limits in the parser. - for trailer in trailers: - if isinstance(trailer, SubscriptPartial): - atom = Subscript( - value=atom, - whitespace_after_value=parse_parenthesizable_whitespace( - config, trailer.whitespace_before - ), - lbracket=trailer.lbracket, - # pyre-fixme[6]: Expected `Sequence[SubscriptElement]` for 4th param - # but got `Union[typing.Sequence[SubscriptElement], Index, Slice]`. - slice=trailer.slice, - rbracket=trailer.rbracket, - ) - elif isinstance(trailer, AttributePartial): - atom = Attribute(value=atom, dot=trailer.dot, attr=trailer.attr) - elif isinstance(trailer, CallPartial): - # If the trailing argument doesn't have a comma, then it owns the - # trailing whitespace before the rpar. Otherwise, the comma owns - # it. - if ( - len(trailer.args) > 0 - and trailer.args[-1].comma == MaybeSentinel.DEFAULT - ): - args = ( - *trailer.args[:-1], - trailer.args[-1].with_changes( - whitespace_after_arg=trailer.rpar.whitespace_before - ), - ) - else: - args = trailer.args - atom = Call( - func=atom, - whitespace_after_func=parse_parenthesizable_whitespace( - config, trailer.lpar.whitespace_before - ), - whitespace_before_args=trailer.lpar.value.whitespace_after, - # pyre-fixme[6]: Expected `Sequence[Arg]` for 4th param but got - # `Tuple[object, ...]`. - args=tuple(args), - ) - else: - # This is an invalid trailer, so lets give up - raise CSTLogicError() - return WithLeadingWhitespace(atom, whitespace_before) - - -@with_production( - "trailer", "trailer_arglist | trailer_subscriptlist | trailer_attribute" -) -def convert_trailer( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - return child - - -@with_production("trailer_arglist", "'(' [arglist] ')'") -def convert_trailer_arglist( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - lpar, *arglist, rpar = children - return CallPartial( - lpar=WithLeadingWhitespace( - LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpar.whitespace_after - ) - ), - lpar.whitespace_before, - ), - args=() if not arglist else arglist[0].args, - rpar=RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpar.whitespace_before - ) - ), - ) - - -@with_production("trailer_subscriptlist", "'[' subscriptlist ']'") -def convert_trailer_subscriptlist( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (lbracket, subscriptlist, rbracket) = children - return SubscriptPartial( - lbracket=LeftSquareBracket( - whitespace_after=parse_parenthesizable_whitespace( - config, lbracket.whitespace_after - ) - ), - slice=subscriptlist.value, - rbracket=RightSquareBracket( - whitespace_before=parse_parenthesizable_whitespace( - config, rbracket.whitespace_before - ) - ), - whitespace_before=lbracket.whitespace_before, - ) - - -@with_production("subscriptlist", "subscript (',' subscript)* [',']") -def convert_subscriptlist( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - # This is a list of SubscriptElement, so construct as such by grouping every - # subscript with an optional comma and adding to a list. - elements = [] - for slice, comma in grouper(children, 2): - if comma is None: - elements.append(SubscriptElement(slice=slice.value)) - else: - elements.append( - SubscriptElement( - slice=slice.value, - comma=Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, comma.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, comma.whitespace_after - ), - ), - ) - ) - return WithLeadingWhitespace(elements, children[0].whitespace_before) - - -@with_production("subscript", "test | [test] ':' [test] [sliceop]") -def convert_subscript( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1 and not isinstance(children[0], Token): - # This is just an index node - (test,) = children - return WithLeadingWhitespace(Index(test.value), test.whitespace_before) - - if isinstance(children[-1], SlicePartial): - # We got a partial slice as the final param. Extract the final - # bits of the full subscript. - *others, sliceop = children - whitespace_before = others[0].whitespace_before - second_colon = sliceop.second_colon - step = sliceop.step - else: - # We can just parse this below, without taking extras from the - # partial child. - others = children - whitespace_before = others[0].whitespace_before - second_colon = MaybeSentinel.DEFAULT - step = None - - # We need to create a partial slice to pass up. So, align so we have - # a list that's always [Optional[Test], Colon, Optional[Test]]. - if isinstance(others[0], Token): - # First token is a colon, so insert an empty test on the LHS. We - # know the RHS is a test since it's not a sliceop. - slicechildren = [None, *others] - else: - # First token is non-colon, so its a test. - slicechildren = [*others] - - if len(slicechildren) < 3: - # Now, we have to fill in the RHS. We know its two long - # at this point if its not already 3. - slicechildren = [*slicechildren, None] - - lower, first_colon, upper = slicechildren - return WithLeadingWhitespace( - Slice( - lower=lower.value if lower is not None else None, - first_colon=Colon( - whitespace_before=parse_parenthesizable_whitespace( - config, - first_colon.whitespace_before, - ), - whitespace_after=parse_parenthesizable_whitespace( - config, - first_colon.whitespace_after, - ), - ), - upper=upper.value if upper is not None else None, - second_colon=second_colon, - step=step, - ), - whitespace_before=whitespace_before, - ) - - -@with_production("sliceop", "':' [test]") -def convert_sliceop( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 2: - colon, test = children - step = test.value - else: - (colon,) = children - step = None - return SlicePartial( - second_colon=Colon( - whitespace_before=parse_parenthesizable_whitespace( - config, colon.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, colon.whitespace_after - ), - ), - step=step, - ) - - -@with_production("trailer_attribute", "'.' NAME") -def convert_trailer_attribute( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - dot, name = children - return AttributePartial( - dot=Dot( - whitespace_before=parse_parenthesizable_whitespace( - config, dot.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, dot.whitespace_after - ), - ), - attr=Name(name.string), - ) - - -@with_production( - "atom", - "atom_parens | atom_squarebrackets | atom_curlybraces | atom_string | atom_basic | atom_ellipses", -) -def convert_atom( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - return child - - -@with_production("atom_basic", "NAME | NUMBER | 'None' | 'True' | 'False'") -def convert_atom_basic( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - if child.type.name == "NAME": - # This also handles 'None', 'True', and 'False' directly, but we - # keep it in the grammar to be more correct. - return WithLeadingWhitespace(Name(child.string), child.whitespace_before) - elif child.type.name == "NUMBER": - # We must determine what type of number it is since we split node - # types up this way. - if re.fullmatch(INTNUMBER_RE, child.string): - return WithLeadingWhitespace(Integer(child.string), child.whitespace_before) - elif re.fullmatch(FLOATNUMBER_RE, child.string): - return WithLeadingWhitespace(Float(child.string), child.whitespace_before) - elif re.fullmatch(IMAGNUMBER_RE, child.string): - return WithLeadingWhitespace( - Imaginary(child.string), child.whitespace_before - ) - else: - raise ParserSyntaxError( - f"Unparseable number {child.string}", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - else: - raise ParserSyntaxError( - f"Logic error, unexpected token {child.type.name}", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - - -@with_production("atom_squarebrackets", "'[' [testlist_comp_list] ']'") -def convert_atom_squarebrackets( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - lbracket_tok, *body, rbracket_tok = children - lbracket = LeftSquareBracket( - whitespace_after=parse_parenthesizable_whitespace( - config, lbracket_tok.whitespace_after - ) - ) - - rbracket = RightSquareBracket( - whitespace_before=parse_parenthesizable_whitespace( - config, rbracket_tok.whitespace_before - ) - ) - - if len(body) == 0: - list_node = List((), lbracket=lbracket, rbracket=rbracket) - else: # len(body) == 1 - # body[0] is a List or ListComp - list_node = body[0].value.with_changes(lbracket=lbracket, rbracket=rbracket) - - return WithLeadingWhitespace(list_node, lbracket_tok.whitespace_before) - - -@with_production("atom_curlybraces", "'{' [dictorsetmaker] '}'") -def convert_atom_curlybraces( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - lbrace_tok, *body, rbrace_tok = children - lbrace = LeftCurlyBrace( - whitespace_after=parse_parenthesizable_whitespace( - config, lbrace_tok.whitespace_after - ) - ) - - rbrace = RightCurlyBrace( - whitespace_before=parse_parenthesizable_whitespace( - config, rbrace_tok.whitespace_before - ) - ) - - if len(body) == 0: - dict_or_set_node = Dict((), lbrace=lbrace, rbrace=rbrace) - else: # len(body) == 1 - dict_or_set_node = body[0].value.with_changes(lbrace=lbrace, rbrace=rbrace) - - return WithLeadingWhitespace(dict_or_set_node, lbrace_tok.whitespace_before) - - -@with_production("atom_parens", "'(' [yield_expr|testlist_comp_tuple] ')'") -def convert_atom_parens( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - lpar_tok, *atoms, rpar_tok = children - - lpar = LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpar_tok.whitespace_after - ) - ) - - rpar = RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpar_tok.whitespace_before - ) - ) - - if len(atoms) == 1: - # inner_atom is a _BaseParenthesizedNode - inner_atom = atoms[0].value - return WithLeadingWhitespace( - inner_atom.with_changes( - # pyre-fixme[60]: Expected to unpack an iterable, but got `unknown`. - lpar=(lpar, *inner_atom.lpar), - # pyre-fixme[60]: Expected to unpack an iterable, but got `unknown`. - rpar=(*inner_atom.rpar, rpar), - ), - lpar_tok.whitespace_before, - ) - else: - return WithLeadingWhitespace( - Tuple((), lpar=(lpar,), rpar=(rpar,)), lpar_tok.whitespace_before - ) - - -@with_production("atom_ellipses", "'...'") -def convert_atom_ellipses( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (token,) = children - return WithLeadingWhitespace(Ellipsis(), token.whitespace_before) - - -@with_production("atom_string", "(STRING | fstring) [atom_string]") -def convert_atom_string( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - return children[0] - else: - left, right = children - return WithLeadingWhitespace( - ConcatenatedString( - left=left.value, - whitespace_between=parse_parenthesizable_whitespace( - config, right.whitespace_before - ), - right=right.value, - ), - left.whitespace_before, - ) - - -@with_production("fstring", "FSTRING_START fstring_content* FSTRING_END") -def convert_fstring( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - start, *content, end = children - return WithLeadingWhitespace( - FormattedString(start=start.string, parts=tuple(content), end=end.string), - start.whitespace_before, - ) - - -@with_production("fstring_content", "FSTRING_STRING | fstring_expr") -def convert_fstring_content( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - if isinstance(child, Token): - # Construct and return a raw string portion. - return FormattedStringText(child.string) - else: - # Pass the expression up one production. - return child - - -@with_production("fstring_conversion", "'!' NAME") -def convert_fstring_conversion( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - exclaim, name = children - # There cannot be a space between the two tokens, so no need to preserve this. - return FormattedStringConversionPartial(name.string, exclaim.whitespace_before) - - -@with_production("fstring_equality", "'='", version=">=3.8") -def convert_fstring_equality( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (equal,) = children - return AssignEqual( - whitespace_before=parse_parenthesizable_whitespace( - config, equal.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, equal.whitespace_after - ), - ) - - -@with_production( - "fstring_expr", - "'{' (testlist_comp_tuple | yield_expr) [ fstring_equality ] [ fstring_conversion ] [ fstring_format_spec ] '}'", - version=">=3.8", -) -@with_production( - "fstring_expr", - "'{' (testlist_comp_tuple | yield_expr) [ fstring_conversion ] [ fstring_format_spec ] '}'", - version="<3.8", -) -def convert_fstring_expr( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - openbrkt, testlist, *conversions, closebrkt = children - - # Extract any optional equality (self-debugging expressions) - if len(conversions) > 0 and isinstance(conversions[0], AssignEqual): - equal = conversions[0] - conversions = conversions[1:] - else: - equal = None - - # Extract any optional conversion - if len(conversions) > 0 and isinstance( - conversions[0], FormattedStringConversionPartial - ): - conversion = conversions[0].value - conversions = conversions[1:] - else: - conversion = None - - # Extract any optional format spec - if len(conversions) > 0: - format_spec = conversions[0].values - else: - format_spec = None - - # Fix up any spacing issue we find due to the fact that the equal can - # have whitespace and is also at the end of the expression. - if equal is not None: - whitespace_after_expression = SimpleWhitespace("") - else: - whitespace_after_expression = parse_parenthesizable_whitespace( - config, children[2].whitespace_before - ) - - return FormattedStringExpression( - whitespace_before_expression=parse_parenthesizable_whitespace( - config, testlist.whitespace_before - ), - expression=testlist.value, - equal=equal, - whitespace_after_expression=whitespace_after_expression, - conversion=conversion, - format_spec=format_spec, - ) - - -@with_production("fstring_format_spec", "':' fstring_content*") -def convert_fstring_format_spec( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - colon, *content = children - return FormattedStringFormatSpecPartial(tuple(content), colon.whitespace_before) - - -@with_production( - "testlist_comp_tuple", - "(namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )", - version=">=3.8", -) -@with_production( - "testlist_comp_tuple", - "(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )", - version=">=3.5,<3.8", -) -@with_production( - "testlist_comp_tuple", - "(test) ( comp_for | (',' (test))* [','] )", - version="<3.5", -) -def convert_testlist_comp_tuple( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - return _convert_testlist_comp( - config, - children, - single_child_is_sequence=False, - sequence_type=Tuple, - comprehension_type=GeneratorExp, - ) - - -@with_production( - "testlist_comp_list", - "(namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )", - version=">=3.8", -) -@with_production( - "testlist_comp_list", - "(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )", - version=">=3.5,<3.8", -) -@with_production( - "testlist_comp_list", - "(test) ( comp_for | (',' (test))* [','] )", - version="<3.5", -) -def convert_testlist_comp_list( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - return _convert_testlist_comp( - config, - children, - single_child_is_sequence=True, - sequence_type=List, - comprehension_type=ListComp, - ) - - -def _convert_testlist_comp( - config: ParserConfig, - children: typing.Sequence[typing.Any], - single_child_is_sequence: bool, - sequence_type: typing.Union[ - typing.Type[Tuple], typing.Type[List], typing.Type[Set] - ], - comprehension_type: typing.Union[ - typing.Type[GeneratorExp], typing.Type[ListComp], typing.Type[SetComp] - ], -) -> typing.Any: - # This is either a single-element list, or the second token is a comma, so we're not - # in a generator. - if len(children) == 1 or isinstance(children[1], Token): - return _convert_sequencelike( - config, children, single_child_is_sequence, sequence_type - ) - else: - # N.B. The parent node (e.g. atom) is responsible for computing and attaching - # whitespace information on any parenthesis, square brackets, or curly braces - elt, for_in = children - return WithLeadingWhitespace( - comprehension_type(elt=elt.value, for_in=for_in, lpar=(), rpar=()), - elt.whitespace_before, - ) - - -@with_production("testlist_star_expr", "(test|star_expr) (',' (test|star_expr))* [',']") -@with_production("testlist", "test (',' test)* [',']") -@with_production("exprlist", "(expr|star_expr) (',' (expr|star_expr))* [',']") -def convert_test_or_expr_list( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - # Used by expression statements and assignments. Neither of these cases want to - # treat a single child as a sequence. - return _convert_sequencelike( - config, children, single_child_is_sequence=False, sequence_type=Tuple - ) - - -def _convert_sequencelike( - config: ParserConfig, - children: typing.Sequence[typing.Any], - single_child_is_sequence: bool, - sequence_type: typing.Union[ - typing.Type[Tuple], typing.Type[List], typing.Type[Set] - ], -) -> typing.Any: - if not single_child_is_sequence and len(children) == 1: - return children[0] - # N.B. The parent node (e.g. atom) is responsible for computing and attaching - # whitespace information on any parenthesis, square brackets, or curly braces - elements = [] - for wrapped_expr_or_starred_element, comma_token in grouper(children, 2): - expr_or_starred_element = wrapped_expr_or_starred_element.value - if comma_token is None: - comma = MaybeSentinel.DEFAULT - else: - comma = Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, comma_token.whitespace_before - ), - # Only compute whitespace_after if we're not a trailing comma. - # If we're a trailing comma, that whitespace should be consumed by the - # TrailingWhitespace, parenthesis, etc. - whitespace_after=( - parse_parenthesizable_whitespace( - config, comma_token.whitespace_after - ) - if comma_token is not children[-1] - else SimpleWhitespace("") - ), - ) - - if isinstance(expr_or_starred_element, StarredElement): - starred_element = expr_or_starred_element - elements.append(starred_element.with_changes(comma=comma)) - else: - expr = expr_or_starred_element - elements.append(Element(value=expr, comma=comma)) - - # lpar/rpar are the responsibility of our parent - return WithLeadingWhitespace( - sequence_type(elements, lpar=(), rpar=()), - children[0].whitespace_before, - ) - - -@with_production( - "dictorsetmaker", - ( - "( ((test ':' test | '**' expr)" - + " (comp_for | (',' (test ':' test | '**' expr))* [','])) |" - + "((test | star_expr) " - + " (comp_for | (',' (test | star_expr))* [','])) )" - ), - version=">=3.5", -) -@with_production( - "dictorsetmaker", - ( - "( ((test ':' test)" - + " (comp_for | (',' (test ':' test))* [','])) |" - + "((test) " - + " (comp_for | (',' (test))* [','])) )" - ), - version="<3.5", -) -def convert_dictorsetmaker( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - # We'll always have at least one child. `atom_curlybraces` handles empty - # dicts. - if len(children) > 1 and ( - (isinstance(children[1], Token) and children[1].string == ":") - or (isinstance(children[0], Token) and children[0].string == "**") - ): - return _convert_dict(config, children) - else: - return _convert_set(config, children) - - -def _convert_dict_element( - config: ParserConfig, - children_iter: typing.Iterator[typing.Any], - last_child: typing.Any, -) -> typing.Union[DictElement, StarredDictElement]: - first = next(children_iter) - if isinstance(first, Token) and first.string == "**": - expr = next(children_iter) - element = StarredDictElement( - expr.value, - whitespace_before_value=parse_parenthesizable_whitespace( - config, expr.whitespace_before - ), - ) - else: - key = first - colon_tok = next(children_iter) - value = next(children_iter) - element = DictElement( - key.value, - value.value, - whitespace_before_colon=parse_parenthesizable_whitespace( - config, colon_tok.whitespace_before - ), - whitespace_after_colon=parse_parenthesizable_whitespace( - config, colon_tok.whitespace_after - ), - ) - # Handle the trailing comma (if there is one) - try: - comma_token = next(children_iter) - element = element.with_changes( - comma=Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, comma_token.whitespace_before - ), - # Only compute whitespace_after if we're not a trailing comma. - # If we're a trailing comma, that whitespace should be consumed by the - # RightBracket. - whitespace_after=( - parse_parenthesizable_whitespace( - config, comma_token.whitespace_after - ) - if comma_token is not last_child - else SimpleWhitespace("") - ), - ) - ) - except StopIteration: - pass - return element - - -def _convert_dict( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - is_first_starred = isinstance(children[0], Token) and children[0].string == "**" - if is_first_starred: - possible_comp_for = None if len(children) < 3 else children[2] - else: - possible_comp_for = None if len(children) < 4 else children[3] - if isinstance(possible_comp_for, CompFor): - if is_first_starred: - raise PartialParserSyntaxError( - "dict unpacking cannot be used in dict comprehension" - ) - return _convert_dict_comp(config, children) - - children_iter = iter(children) - last_child = children[-1] - elements = [] - while True: - try: - elements.append(_convert_dict_element(config, children_iter, last_child)) - except StopIteration: - break - # lbrace, rbrace, lpar, and rpar will be attached as-needed by the atom grammar - return WithLeadingWhitespace(Dict(tuple(elements)), children[0].whitespace_before) - - -def _convert_dict_comp(config, children: typing.Sequence[typing.Any]) -> typing.Any: - key, colon_token, value, comp_for = children - return WithLeadingWhitespace( - DictComp( - key.value, - value.value, - comp_for, - # lbrace, rbrace, lpar, and rpar will be attached as-needed by the atom grammar - whitespace_before_colon=parse_parenthesizable_whitespace( - config, colon_token.whitespace_before - ), - whitespace_after_colon=parse_parenthesizable_whitespace( - config, colon_token.whitespace_after - ), - ), - key.whitespace_before, - ) - - -def _convert_set( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - return _convert_testlist_comp( - config, - children, - single_child_is_sequence=True, - sequence_type=Set, - comprehension_type=SetComp, - ) - - -@with_production("arglist", "argument (',' argument)* [',']") -def convert_arglist( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - args = [] - for argument, comma in grouper(children, 2): - if comma is None: - args.append(argument) - else: - args.append( - argument.with_changes( - comma=Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, comma.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, comma.whitespace_after - ), - ) - ) - ) - return ArglistPartial(args) - - -@with_production("argument", "arg_assign_comp_for | star_arg") -def convert_argument( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - (child,) = children - return child - - -@with_production( - "arg_assign_comp_for", "test [comp_for] | test '=' test", version="<=3.7" -) -@with_production( - "arg_assign_comp_for", - "test [comp_for] | test ':=' test | test '=' test", - version=">=3.8", -) -def convert_arg_assign_comp_for( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - # Simple test - (child,) = children - return Arg(value=child.value) - elif len(children) == 2: - elt, for_in = children - return Arg(value=GeneratorExp(elt.value, for_in, lpar=(), rpar=())) - else: - lhs, equal, rhs = children - # "key := value" assignment; positional - if equal.string == ":=": - val = convert_namedexpr_test(config, children) - if not isinstance(val, WithLeadingWhitespace): - raise TypeError( - f"convert_namedexpr_test returned {val!r}, not WithLeadingWhitespace" - ) - return Arg(value=val.value) - # "key = value" assignment; keyword argument - return Arg( - keyword=lhs.value, - equal=AssignEqual( - whitespace_before=parse_parenthesizable_whitespace( - config, equal.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, equal.whitespace_after - ), - ), - value=rhs.value, - ) - - -@with_production("star_arg", "'**' test | '*' test") -def convert_star_arg( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - star, test = children - return Arg( - star=star.string, - whitespace_after_star=parse_parenthesizable_whitespace( - config, star.whitespace_after - ), - value=test.value, - ) - - -@with_production("sync_comp_for", "'for' exprlist 'in' or_test comp_if* [comp_for]") -def convert_sync_comp_for( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - # unpack - for_tok, target, in_tok, iter, *trailing = children - if len(trailing) and isinstance(trailing[-1], CompFor): - *ifs, inner_for_in = trailing - else: - ifs, inner_for_in = trailing, None - - return CompFor( - target=target.value, - iter=iter.value, - ifs=ifs, - inner_for_in=inner_for_in, - whitespace_before=parse_parenthesizable_whitespace( - config, for_tok.whitespace_before - ), - whitespace_after_for=parse_parenthesizable_whitespace( - config, for_tok.whitespace_after - ), - whitespace_before_in=parse_parenthesizable_whitespace( - config, in_tok.whitespace_before - ), - whitespace_after_in=parse_parenthesizable_whitespace( - config, in_tok.whitespace_after - ), - ) - - -@with_production("comp_for", "[ASYNC] sync_comp_for", version=">=3.6") -@with_production("comp_for", "sync_comp_for", version="<=3.5") -def convert_comp_for( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - (sync_comp_for,) = children - return sync_comp_for - else: - (async_tok, sync_comp_for) = children - return sync_comp_for.with_changes( - # asynchronous steals the `CompFor`'s `whitespace_before`. - asynchronous=Asynchronous(whitespace_after=sync_comp_for.whitespace_before), - # But, in exchange, `CompFor` gets to keep `async_tok`'s leading - # whitespace, because that's now the beginning of the `CompFor`. - whitespace_before=parse_parenthesizable_whitespace( - config, async_tok.whitespace_before - ), - ) - - -@with_production("comp_if", "'if' test_nocond") -def convert_comp_if( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if_tok, test = children - return CompIf( - test.value, - whitespace_before=parse_parenthesizable_whitespace( - config, if_tok.whitespace_before - ), - whitespace_before_test=parse_parenthesizable_whitespace( - config, test.whitespace_before - ), - ) - - -@with_production("yield_expr", "'yield' [yield_arg]") -def convert_yield_expr( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - # Yielding implicit none - (yield_token,) = children - yield_node = Yield(value=None) - else: - # Yielding explicit value - (yield_token, yield_arg) = children - yield_node = Yield( - value=yield_arg.value, - whitespace_after_yield=parse_parenthesizable_whitespace( - config, yield_arg.whitespace_before - ), - ) - - return WithLeadingWhitespace(yield_node, yield_token.whitespace_before) - - -@with_production("yield_arg", "testlist", version="<3.3") -@with_production("yield_arg", "'from' test | testlist", version=">=3.3,<3.8") -@with_production("yield_arg", "'from' test | testlist_star_expr", version=">=3.8") -def convert_yield_arg( - config: ParserConfig, children: typing.Sequence[typing.Any] -) -> typing.Any: - if len(children) == 1: - # Just a regular testlist, pass it up - (child,) = children - return child - else: - # Its a yield from - (from_token, test) = children - - return WithLeadingWhitespace( - From( - item=test.value, - whitespace_after_from=parse_parenthesizable_whitespace( - config, test.whitespace_before - ), - ), - from_token.whitespace_before, - ) diff --git a/libcst/_parser/conversions/module.py b/libcst/_parser/conversions/module.py deleted file mode 100644 index b40641d0..00000000 --- a/libcst/_parser/conversions/module.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -from typing import Any, Sequence - -from libcst._nodes.module import Module -from libcst._nodes.whitespace import NEWLINE_RE -from libcst._parser.production_decorator import with_production -from libcst._parser.types.config import ParserConfig - - -@with_production("file_input", "(NEWLINE | stmt)* ENDMARKER") -def convert_file_input(config: ParserConfig, children: Sequence[Any]) -> Any: - *body, footer = children - if len(body) == 0: - # If there's no body, the header and footer are ambiguous. The header is more - # important, and should own the EmptyLine nodes instead of the footer. - header = footer - footer = () - if ( - len(config.lines) == 2 - and NEWLINE_RE.fullmatch(config.lines[0]) - and config.lines[1] == "" - ): - # This is an empty file (not even a comment), so special-case this to an - # empty list instead of a single dummy EmptyLine (which is what we'd - # normally parse). - header = () - else: - # Steal the leading lines from the first statement, and move them into the - # header. - first_stmt = body[0] - header = first_stmt.leading_lines - body[0] = first_stmt.with_changes(leading_lines=()) - return Module( - header=header, - body=body, - footer=footer, - encoding=config.encoding, - default_indent=config.default_indent, - default_newline=config.default_newline, - has_trailing_newline=config.has_trailing_newline, - ) diff --git a/libcst/_parser/conversions/params.py b/libcst/_parser/conversions/params.py deleted file mode 100644 index 5b29f95d..00000000 --- a/libcst/_parser/conversions/params.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -from typing import Any, List, Optional, Sequence, Union - -from libcst import CSTLogicError -from libcst._exceptions import PartialParserSyntaxError -from libcst._maybe_sentinel import MaybeSentinel -from libcst._nodes.expression import ( - Annotation, - Name, - Param, - Parameters, - ParamSlash, - ParamStar, -) -from libcst._nodes.op import AssignEqual, Comma -from libcst._parser.custom_itertools import grouper -from libcst._parser.production_decorator import with_production -from libcst._parser.types.config import ParserConfig -from libcst._parser.types.partials import ParamStarPartial -from libcst._parser.whitespace_parser import parse_parenthesizable_whitespace - - -@with_production( # noqa: C901: too complex - "typedargslist", - """( - (tfpdef_assign (',' tfpdef_assign)* ',' tfpdef_posind [',' [ tfpdef_assign ( - ',' tfpdef_assign)* [',' [ - tfpdef_star (',' tfpdef_assign)* [',' [tfpdef_starstar [',']]] - | tfpdef_starstar [',']]] - | tfpdef_star (',' tfpdef_assign)* [',' [tfpdef_starstar [',']]] - | tfpdef_starstar [',']]] ) - | (tfpdef_assign (',' tfpdef_assign)* [',' [ - tfpdef_star (',' tfpdef_assign)* [',' [tfpdef_starstar [',']]] - | tfpdef_starstar [',']]] - | tfpdef_star (',' tfpdef_assign)* [',' [tfpdef_starstar [',']]] - | tfpdef_starstar [',']) - )""", - version=">=3.8", -) -@with_production( # noqa: C901: too complex - "typedargslist", - ( - "(tfpdef_assign (',' tfpdef_assign)* " - + "[',' [tfpdef_star (',' tfpdef_assign)* [',' [tfpdef_starstar [',']]] | tfpdef_starstar [',']]]" - + "| tfpdef_star (',' tfpdef_assign)* [',' [tfpdef_starstar [',']]] | tfpdef_starstar [','])" - ), - version=">=3.6,<=3.7", -) -@with_production( # noqa: C901: too complex - "typedargslist", - ( - "(tfpdef_assign (',' tfpdef_assign)* " - + "[',' [tfpdef_star (',' tfpdef_assign)* [',' tfpdef_starstar] | tfpdef_starstar]]" - + "| tfpdef_star (',' tfpdef_assign)* [',' tfpdef_starstar] | tfpdef_starstar)" - ), - version="<=3.5", -) -@with_production( - "varargslist", - """vfpdef_assign (',' vfpdef_assign)* ',' vfpdef_posind [',' [ (vfpdef_assign (',' vfpdef_assign)* [',' [ - vfpdef_star (',' vfpdef_assign)* [',' [vfpdef_starstar [',']]] - | vfpdef_starstar [',']]] - | vfpdef_star (',' vfpdef_assign)* [',' [vfpdef_starstar [',']]] - | vfpdef_starstar [',']) ]] | (vfpdef_assign (',' vfpdef_assign)* [',' [ - vfpdef_star (',' vfpdef_assign)* [',' [vfpdef_starstar [',']]] - | vfpdef_starstar [',']]] - | vfpdef_star (',' vfpdef_assign)* [',' [vfpdef_starstar [',']]] - | vfpdef_starstar [','] - )""", - version=">=3.8", -) -@with_production( - "varargslist", - ( - "(vfpdef_assign (',' vfpdef_assign)* " - + "[',' [vfpdef_star (',' vfpdef_assign)* [',' [vfpdef_starstar [',']]] | vfpdef_starstar [',']]]" - + "| vfpdef_star (',' vfpdef_assign)* [',' [vfpdef_starstar [',']]] | vfpdef_starstar [','])" - ), - version=">=3.6,<=3.7", -) -@with_production( - "varargslist", - ( - "(vfpdef_assign (',' vfpdef_assign)* " - + "[',' [vfpdef_star (',' vfpdef_assign)* [',' vfpdef_starstar] | vfpdef_starstar]]" - + "| vfpdef_star (',' vfpdef_assign)* [',' vfpdef_starstar] | vfpdef_starstar)" - ), - version="<=3.5", -) -def convert_argslist( # noqa: C901 - config: ParserConfig, children: Sequence[Any] -) -> Any: - posonly_params: List[Param] = [] - posonly_ind: Union[ParamSlash, MaybeSentinel] = MaybeSentinel.DEFAULT - params: List[Param] = [] - seen_default: bool = False - star_arg: Union[Param, ParamStar, MaybeSentinel] = MaybeSentinel.DEFAULT - kwonly_params: List[Param] = [] - star_kwarg: Optional[Param] = None - - def add_param( - current_param: Optional[List[Param]], param: Union[Param, ParamStar] - ) -> Optional[List[Param]]: - nonlocal star_arg - nonlocal star_kwarg - nonlocal seen_default - nonlocal posonly_params - nonlocal posonly_ind - nonlocal params - - if isinstance(param, ParamStar): - # Only can add this if we don't already have a "*" or a "*param". - if current_param is params: - star_arg = param - current_param = kwonly_params - else: - # Example code: - # def fn(*abc, *): ... - # This should be unreachable, the grammar already disallows it. - raise ValueError( - "Cannot have multiple star ('*') markers in a single argument " - + "list." - ) - elif isinstance(param, ParamSlash): - # Only can add this if we don't already have a "/" or a "*" or a "*param". - if current_param is params and len(posonly_params) == 0: - posonly_ind = param - posonly_params = params - params = [] - current_param = params - else: - # Example code: - # def fn(foo, /, *, /, bar): ... - # This should be unreachable, the grammar already disallows it. - raise ValueError( - "Cannot have multiple slash ('/') markers in a single argument " - + "list." - ) - elif isinstance(param.star, str) and param.star == "" and param.default is None: - # Can only add this if we're in the params or kwonly_params section - if current_param is params and not seen_default: - params.append(param) - elif current_param is kwonly_params: - kwonly_params.append(param) - else: - # Example code: - # def fn(first=None, second): ... - # This code is reachable, so we should use a PartialParserSyntaxError. - raise PartialParserSyntaxError( - "Cannot have a non-default argument following a default argument." - ) - elif ( - isinstance(param.star, str) - and param.star == "" - and param.default is not None - ): - # Can only add this if we're not yet at star args. - if current_param is params: - seen_default = True - params.append(param) - elif current_param is kwonly_params: - kwonly_params.append(param) - else: - # Example code: - # def fn(**kwargs, trailing=None) - # This should be unreachable, the grammar already disallows it. - raise ValueError("Cannot have any arguments after a kwargs expansion.") - elif ( - isinstance(param.star, str) and param.star == "*" and param.default is None - ): - # Can only add this if we're in params, since we only allow one of - # "*" or "*param". - if current_param is params: - star_arg = param - current_param = kwonly_params - else: - # Example code: - # def fn(*first, *second): ... - # This should be unreachable, the grammar already disallows it. - raise ValueError( - "Expected a keyword argument but found a starred positional " - + "argument expansion." - ) - elif ( - isinstance(param.star, str) and param.star == "**" and param.default is None - ): - # Can add this in all cases where we don't have a star_kwarg - # yet. - if current_param is not None: - star_kwarg = param - current_param = None - else: - # Example code: - # def fn(**first, **second) - # This should be unreachable, the grammar already disallows it. - raise ValueError( - "Multiple starred keyword argument expansions are not allowed in a " - + "single argument list" - ) - else: - # The state machine should never end up here. - raise CSTLogicError("Logic error!") - - return current_param - - # The parameter list we are adding to - current: Optional[List[Param]] = params - - # We should have every other item in the group as a param or a comma by now, - # so split them up, add commas and then put them in the appropriate group. - for parameter, comma in grouper(children, 2): - if comma is None: - if isinstance(parameter, ParamStarPartial): - # Example: - # def fn(abc, *): ... - # - # There's also the case where we have bare * with a trailing comma. - # That's handled later. - # - # It's not valid to construct a ParamStar object without a comma, so we - # need to catch the non-comma case separately. - raise PartialParserSyntaxError( - "Named (keyword) arguments must follow a bare *." - ) - else: - current = add_param(current, parameter) - else: - comma = Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, comma.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, comma.whitespace_after - ), - ) - if isinstance(parameter, ParamStarPartial): - current = add_param(current, ParamStar(comma=comma)) - else: - current = add_param(current, parameter.with_changes(comma=comma)) - - if isinstance(star_arg, ParamStar) and len(kwonly_params) == 0: - # Example: - # def fn(abc, *,): ... - # - # This will raise a validation error, but we want to make sure to raise a syntax - # error instead. - # - # The case where there's no trailing comma is already handled by this point, so - # this conditional is only for the case where we have a trailing comma. - raise PartialParserSyntaxError( - "Named (keyword) arguments must follow a bare *." - ) - - return Parameters( - posonly_params=tuple(posonly_params), - posonly_ind=posonly_ind, - params=tuple(params), - star_arg=star_arg, - kwonly_params=tuple(kwonly_params), - star_kwarg=star_kwarg, - ) - - -@with_production("tfpdef_star", "'*' [tfpdef]") -@with_production("vfpdef_star", "'*' [vfpdef]") -def convert_fpdef_star(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (star,) = children - return ParamStarPartial() - else: - star, param = children - return param.with_changes( - star=star.string, - whitespace_after_star=parse_parenthesizable_whitespace( - config, star.whitespace_after - ), - ) - - -@with_production("tfpdef_starstar", "'**' tfpdef") -@with_production("vfpdef_starstar", "'**' vfpdef") -def convert_fpdef_starstar(config: ParserConfig, children: Sequence[Any]) -> Any: - starstar, param = children - return param.with_changes( - star=starstar.string, - whitespace_after_star=parse_parenthesizable_whitespace( - config, starstar.whitespace_after - ), - ) - - -@with_production("tfpdef_assign", "tfpdef ['=' test]") -@with_production("vfpdef_assign", "vfpdef ['=' test]") -def convert_fpdef_assign(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (child,) = children - return child - - param, equal, default = children - return param.with_changes( - equal=AssignEqual( - whitespace_before=parse_parenthesizable_whitespace( - config, equal.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, equal.whitespace_after - ), - ), - default=default.value, - ) - - -@with_production("tfpdef", "NAME [':' test]") -@with_production("vfpdef", "NAME") -def convert_fpdef(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - # This is just a parameter - (child,) = children - namenode = Name(child.string) - annotation = None - else: - # This is a parameter with a type hint - name, colon, typehint = children - namenode = Name(name.string) - annotation = Annotation( - whitespace_before_indicator=parse_parenthesizable_whitespace( - config, colon.whitespace_before - ), - whitespace_after_indicator=parse_parenthesizable_whitespace( - config, colon.whitespace_after - ), - annotation=typehint.value, - ) - - return Param(star="", name=namenode, annotation=annotation, default=None) - - -@with_production("tfpdef_posind", "'/'") -@with_production("vfpdef_posind", "'/'") -def convert_fpdef_slash(config: ParserConfig, children: Sequence[Any]) -> Any: - return ParamSlash() diff --git a/libcst/_parser/conversions/statement.py b/libcst/_parser/conversions/statement.py deleted file mode 100644 index f96c6ea2..00000000 --- a/libcst/_parser/conversions/statement.py +++ /dev/null @@ -1,1381 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -from typing import Any, Dict, List, Optional, Sequence, Tuple, Type - -from libcst import CSTLogicError -from libcst._exceptions import ParserSyntaxError, PartialParserSyntaxError -from libcst._maybe_sentinel import MaybeSentinel -from libcst._nodes.expression import ( - Annotation, - Arg, - Asynchronous, - Attribute, - Call, - From, - LeftParen, - Name, - Param, - Parameters, - RightParen, -) -from libcst._nodes.op import ( - AddAssign, - AssignEqual, - BaseAugOp, - BitAndAssign, - BitOrAssign, - BitXorAssign, - Comma, - DivideAssign, - Dot, - FloorDivideAssign, - ImportStar, - LeftShiftAssign, - MatrixMultiplyAssign, - ModuloAssign, - MultiplyAssign, - PowerAssign, - RightShiftAssign, - Semicolon, - SubtractAssign, -) -from libcst._nodes.statement import ( - AnnAssign, - AsName, - Assert, - Assign, - AssignTarget, - AugAssign, - Break, - ClassDef, - Continue, - Decorator, - Del, - Else, - ExceptHandler, - Expr, - Finally, - For, - FunctionDef, - Global, - If, - Import, - ImportAlias, - ImportFrom, - IndentedBlock, - NameItem, - Nonlocal, - Pass, - Raise, - Return, - SimpleStatementLine, - SimpleStatementSuite, - Try, - While, - With, - WithItem, -) -from libcst._nodes.whitespace import EmptyLine, SimpleWhitespace -from libcst._parser.custom_itertools import grouper -from libcst._parser.production_decorator import with_production -from libcst._parser.types.config import ParserConfig -from libcst._parser.types.partials import ( - AnnAssignPartial, - AssignPartial, - AugAssignPartial, - DecoratorPartial, - ExceptClausePartial, - FuncdefPartial, - ImportPartial, - ImportRelativePartial, - SimpleStatementPartial, - WithLeadingWhitespace, -) -from libcst._parser.types.token import Token -from libcst._parser.whitespace_parser import ( - parse_empty_lines, - parse_parenthesizable_whitespace, - parse_simple_whitespace, -) - -AUGOP_TOKEN_LUT: Dict[str, Type[BaseAugOp]] = { - "+=": AddAssign, - "-=": SubtractAssign, - "*=": MultiplyAssign, - "@=": MatrixMultiplyAssign, - "/=": DivideAssign, - "%=": ModuloAssign, - "&=": BitAndAssign, - "|=": BitOrAssign, - "^=": BitXorAssign, - "<<=": LeftShiftAssign, - ">>=": RightShiftAssign, - "**=": PowerAssign, - "//=": FloorDivideAssign, -} - - -@with_production("stmt_input", "stmt ENDMARKER") -def convert_stmt_input(config: ParserConfig, children: Sequence[Any]) -> Any: - (child, endmarker) = children - return child - - -@with_production("stmt", "simple_stmt_line | compound_stmt") -def convert_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (child,) = children - return child - - -@with_production("simple_stmt_partial", "small_stmt (';' small_stmt)* [';'] NEWLINE") -def convert_simple_stmt_partial(config: ParserConfig, children: Sequence[Any]) -> Any: - *statements, trailing_whitespace = children - - last_stmt = len(statements) / 2 - body = [] - for i, (stmt_body, semi) in enumerate(grouper(statements, 2)): - if semi is not None: - if i == (last_stmt - 1): - # Trailing semicolons only own the whitespace before. - semi = Semicolon( - whitespace_before=parse_simple_whitespace( - config, semi.whitespace_before - ), - whitespace_after=SimpleWhitespace(""), - ) - else: - # Middle semicolons own the whitespace before and after. - semi = Semicolon( - whitespace_before=parse_simple_whitespace( - config, semi.whitespace_before - ), - whitespace_after=parse_simple_whitespace( - config, semi.whitespace_after - ), - ) - else: - semi = MaybeSentinel.DEFAULT - body.append(stmt_body.value.with_changes(semicolon=semi)) - return SimpleStatementPartial( - body, - whitespace_before=statements[0].whitespace_before, - trailing_whitespace=trailing_whitespace, - ) - - -@with_production("simple_stmt_line", "simple_stmt_partial") -def convert_simple_stmt_line(config: ParserConfig, children: Sequence[Any]) -> Any: - """ - This function is similar to convert_simple_stmt_suite, but yields a different type - """ - (partial,) = children - return SimpleStatementLine( - partial.body, - leading_lines=parse_empty_lines(config, partial.whitespace_before), - trailing_whitespace=partial.trailing_whitespace, - ) - - -@with_production("simple_stmt_suite", "simple_stmt_partial") -def convert_simple_stmt_suite(config: ParserConfig, children: Sequence[Any]) -> Any: - """ - This function is similar to convert_simple_stmt_line, but yields a different type - """ - (partial,) = children - return SimpleStatementSuite( - partial.body, - leading_whitespace=parse_simple_whitespace(config, partial.whitespace_before), - trailing_whitespace=partial.trailing_whitespace, - ) - - -@with_production( - "small_stmt", - ( - "expr_stmt | del_stmt | pass_stmt | break_stmt | continue_stmt | return_stmt" - + "| raise_stmt | yield_stmt | import_stmt | global_stmt | nonlocal_stmt" - + "| assert_stmt" - ), -) -def convert_small_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - # Doesn't construct SmallStatement, because we don't know about semicolons yet. - # convert_simple_stmt will construct the SmallStatement nodes. - (small_stmt_body,) = children - return small_stmt_body - - -@with_production( - "expr_stmt", - "testlist_star_expr (annassign | augassign | assign* )", - version=">=3.6", -) -@with_production( - "expr_stmt", "testlist_star_expr (augassign | assign* )", version="<=3.5" -) -@with_production("yield_stmt", "yield_expr") -def convert_expr_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - # This is an unassigned expr statement (like a function call) - (test_node,) = children - return WithLeadingWhitespace( - Expr(value=test_node.value), test_node.whitespace_before - ) - elif len(children) == 2: - lhs, rhs = children - if isinstance(rhs, AnnAssignPartial): - return WithLeadingWhitespace( - AnnAssign( - target=lhs.value, - annotation=rhs.annotation, - equal=MaybeSentinel.DEFAULT if rhs.equal is None else rhs.equal, - value=rhs.value, - ), - lhs.whitespace_before, - ) - elif isinstance(rhs, AugAssignPartial): - return WithLeadingWhitespace( - AugAssign(target=lhs.value, operator=rhs.operator, value=rhs.value), - lhs.whitespace_before, - ) - # The only thing it could be at this point is an assign with one or more targets. - # So, walk the children moving the equals ownership back one and constructing a - # list of AssignTargets. - targets = [] - for i in range(len(children) - 1): - target = children[i].value - equal = children[i + 1].equal - - targets.append( - AssignTarget( - target=target, - whitespace_before_equal=equal.whitespace_before, - whitespace_after_equal=equal.whitespace_after, - ) - ) - - return WithLeadingWhitespace( - Assign(targets=tuple(targets), value=children[-1].value), - children[0].whitespace_before, - ) - - -@with_production("annassign", "':' test ['=' test]", version=">=3.6,<3.8") -@with_production( - "annassign", "':' test ['=' (yield_expr|testlist_star_expr)]", version=">=3.8" -) -def convert_annassign(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 2: - # Variable annotation only - colon, annotation = children - annotation = annotation.value - equal = None - value = None - elif len(children) == 4: - # Variable annotation and assignment - colon, annotation, equal, value = children - annotation = annotation.value - value = value.value - equal = AssignEqual( - whitespace_before=parse_simple_whitespace(config, equal.whitespace_before), - whitespace_after=parse_simple_whitespace(config, equal.whitespace_after), - ) - else: - raise ParserSyntaxError( - "Invalid parser state!", lines=config.lines, raw_line=0, raw_column=0 - ) - - return AnnAssignPartial( - annotation=Annotation( - whitespace_before_indicator=parse_simple_whitespace( - config, colon.whitespace_before - ), - whitespace_after_indicator=parse_simple_whitespace( - config, colon.whitespace_after - ), - annotation=annotation, - ), - equal=equal, - value=value, - ) - - -@with_production( - "augassign", - ( - "('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | " - + "'>>=' | '**=' | '//=') (yield_expr | testlist)" - ), - version=">=3.5", -) -@with_production( - "augassign", - ( - "('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | " - + "'>>=' | '**=' | '//=') (yield_expr | testlist)" - ), - version="<3.5", -) -def convert_augassign(config: ParserConfig, children: Sequence[Any]) -> Any: - op, expr = children - if op.string not in AUGOP_TOKEN_LUT: - raise ParserSyntaxError( - f"Unexpected token '{op.string}'!", - lines=config.lines, - raw_line=0, - raw_column=0, - ) - - return AugAssignPartial( - # pyre-ignore Pyre seems to think that the value of this LUT is CSTNode - operator=AUGOP_TOKEN_LUT[op.string]( - whitespace_before=parse_simple_whitespace(config, op.whitespace_before), - whitespace_after=parse_simple_whitespace(config, op.whitespace_after), - ), - value=expr.value, - ) - - -@with_production("assign", "'=' (yield_expr|testlist_star_expr)") -def convert_assign(config: ParserConfig, children: Sequence[Any]) -> Any: - equal, expr = children - return AssignPartial( - equal=AssignEqual( - whitespace_before=parse_simple_whitespace(config, equal.whitespace_before), - whitespace_after=parse_simple_whitespace(config, equal.whitespace_after), - ), - value=expr.value, - ) - - -@with_production("pass_stmt", "'pass'") -def convert_pass_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (name,) = children - return WithLeadingWhitespace(Pass(), name.whitespace_before) - - -@with_production("del_stmt", "'del' exprlist") -def convert_del_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (del_name, exprlist) = children - return WithLeadingWhitespace( - Del( - target=exprlist.value, - whitespace_after_del=parse_simple_whitespace( - config, del_name.whitespace_after - ), - ), - del_name.whitespace_before, - ) - - -@with_production("continue_stmt", "'continue'") -def convert_continue_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (name,) = children - return WithLeadingWhitespace(Continue(), name.whitespace_before) - - -@with_production("break_stmt", "'break'") -def convert_break_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (name,) = children - return WithLeadingWhitespace(Break(), name.whitespace_before) - - -@with_production("return_stmt", "'return' [testlist]", version="<=3.7") -@with_production("return_stmt", "'return' [testlist_star_expr]", version=">=3.8") -def convert_return_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (keyword,) = children - return WithLeadingWhitespace( - Return(whitespace_after_return=SimpleWhitespace("")), - keyword.whitespace_before, - ) - else: - (keyword, testlist) = children - return WithLeadingWhitespace( - Return( - value=testlist.value, - whitespace_after_return=parse_simple_whitespace( - config, keyword.whitespace_after - ), - ), - keyword.whitespace_before, - ) - - -@with_production("import_stmt", "import_name | import_from") -def convert_import_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (child,) = children - return child - - -@with_production("import_name", "'import' dotted_as_names") -def convert_import_name(config: ParserConfig, children: Sequence[Any]) -> Any: - importtoken, names = children - return WithLeadingWhitespace( - Import( - names=names.names, - whitespace_after_import=parse_simple_whitespace( - config, importtoken.whitespace_after - ), - ), - importtoken.whitespace_before, - ) - - -@with_production("import_relative", "('.' | '...')* dotted_name | ('.' | '...')+") -def convert_import_relative(config: ParserConfig, children: Sequence[Any]) -> Any: - dots = [] - dotted_name = None - for child in children: - if isinstance(child, Token): - # Special case for "...", which is part of the grammar - if child.string == "...": - dots.extend( - [ - Dot(), - Dot(), - Dot( - whitespace_after=parse_simple_whitespace( - config, child.whitespace_after - ) - ), - ] - ) - else: - dots.append( - Dot( - whitespace_after=parse_simple_whitespace( - config, child.whitespace_after - ) - ) - ) - else: - # This should be the dotted name, and we can't get more than - # one, but lets be sure anyway - if dotted_name is not None: - raise CSTLogicError() - dotted_name = child - - return ImportRelativePartial(relative=tuple(dots), module=dotted_name) - - -@with_production( - "import_from", - "'from' import_relative 'import' ('*' | '(' import_as_names ')' | import_as_names)", -) -def convert_import_from(config: ParserConfig, children: Sequence[Any]) -> Any: - fromtoken, import_relative, importtoken, *importlist = children - - if len(importlist) == 1: - (possible_star,) = importlist - if isinstance(possible_star, Token): - # Its a "*" import, so we must construct this node. - names = ImportStar() - else: - # Its an import as names partial, grab the names from that. - names = possible_star.names - lpar = None - rpar = None - else: - # Its an import as names partial with parens - lpartoken, namespartial, rpartoken = importlist - lpar = LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpartoken.whitespace_after - ) - ) - names = namespartial.names - rpar = RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpartoken.whitespace_before - ) - ) - - # If we have a relative-only import, then we need to relocate the space - # after the final dot to be owned by the import token. - if len(import_relative.relative) > 0 and import_relative.module is None: - whitespace_before_import = import_relative.relative[-1].whitespace_after - relative = ( - *import_relative.relative[:-1], - import_relative.relative[-1].with_changes( - whitespace_after=SimpleWhitespace("") - ), - ) - else: - whitespace_before_import = parse_simple_whitespace( - config, importtoken.whitespace_before - ) - relative = import_relative.relative - - return WithLeadingWhitespace( - ImportFrom( - whitespace_after_from=parse_simple_whitespace( - config, fromtoken.whitespace_after - ), - relative=relative, - module=import_relative.module, - whitespace_before_import=whitespace_before_import, - whitespace_after_import=parse_simple_whitespace( - config, importtoken.whitespace_after - ), - lpar=lpar, - names=names, - rpar=rpar, - ), - fromtoken.whitespace_before, - ) - - -@with_production("import_as_name", "NAME ['as' NAME]") -def convert_import_as_name(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (dotted_name,) = children - return ImportAlias(name=Name(dotted_name.string), asname=None) - else: - dotted_name, astoken, name = children - return ImportAlias( - name=Name(dotted_name.string), - asname=AsName( - whitespace_before_as=parse_simple_whitespace( - config, astoken.whitespace_before - ), - whitespace_after_as=parse_simple_whitespace( - config, astoken.whitespace_after - ), - name=Name(name.string), - ), - ) - - -@with_production("dotted_as_name", "dotted_name ['as' NAME]") -def convert_dotted_as_name(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (dotted_name,) = children - return ImportAlias(name=dotted_name, asname=None) - else: - dotted_name, astoken, name = children - return ImportAlias( - name=dotted_name, - asname=AsName( - whitespace_before_as=parse_parenthesizable_whitespace( - config, astoken.whitespace_before - ), - whitespace_after_as=parse_parenthesizable_whitespace( - config, astoken.whitespace_after - ), - name=Name(name.string), - ), - ) - - -@with_production("import_as_names", "import_as_name (',' import_as_name)* [',']") -def convert_import_as_names(config: ParserConfig, children: Sequence[Any]) -> Any: - return _gather_import_names(config, children) - - -@with_production("dotted_as_names", "dotted_as_name (',' dotted_as_name)*") -def convert_dotted_as_names(config: ParserConfig, children: Sequence[Any]) -> Any: - return _gather_import_names(config, children) - - -def _gather_import_names( - config: ParserConfig, children: Sequence[Any] -) -> ImportPartial: - names = [] - for name, comma in grouper(children, 2): - if comma is None: - names.append(name) - else: - names.append( - name.with_changes( - comma=Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, comma.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, comma.whitespace_after - ), - ) - ) - ) - - return ImportPartial(names=names) - - -@with_production("dotted_name", "NAME ('.' NAME)*") -def convert_dotted_name(config: ParserConfig, children: Sequence[Any]) -> Any: - left, *rest = children - node = Name(left.string) - - for dot, right in grouper(rest, 2): - node = Attribute( - value=node, - dot=Dot( - whitespace_before=parse_parenthesizable_whitespace( - config, dot.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, dot.whitespace_after - ), - ), - attr=Name(right.string), - ) - - return node - - -@with_production("raise_stmt", "'raise' [test ['from' test]]") -def convert_raise_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (raise_token,) = children - whitespace_after_raise = MaybeSentinel.DEFAULT - exc = None - cause = None - elif len(children) == 2: - (raise_token, test) = children - whitespace_after_raise = parse_simple_whitespace(config, test.whitespace_before) - exc = test.value - cause = None - elif len(children) == 4: - (raise_token, test, from_token, source) = children - whitespace_after_raise = parse_simple_whitespace(config, test.whitespace_before) - exc = test.value - cause = From( - whitespace_before_from=parse_simple_whitespace( - config, from_token.whitespace_before - ), - whitespace_after_from=parse_simple_whitespace( - config, source.whitespace_before - ), - item=source.value, - ) - else: - raise CSTLogicError() - - return WithLeadingWhitespace( - Raise(whitespace_after_raise=whitespace_after_raise, exc=exc, cause=cause), - raise_token.whitespace_before, - ) - - -def _construct_nameitems(config: ParserConfig, names: Sequence[Any]) -> List[NameItem]: - nameitems: List[NameItem] = [] - for name, maybe_comma in grouper(names, 2): - if maybe_comma is None: - nameitems.append(NameItem(Name(name.string))) - else: - nameitems.append( - NameItem( - Name(name.string), - comma=Comma( - whitespace_before=parse_simple_whitespace( - config, maybe_comma.whitespace_before - ), - whitespace_after=parse_simple_whitespace( - config, maybe_comma.whitespace_after - ), - ), - ) - ) - return nameitems - - -@with_production("global_stmt", "'global' NAME (',' NAME)*") -def convert_global_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (global_token, *names) = children - return WithLeadingWhitespace( - Global( - names=tuple(_construct_nameitems(config, names)), - whitespace_after_global=parse_simple_whitespace( - config, names[0].whitespace_before - ), - ), - global_token.whitespace_before, - ) - - -@with_production("nonlocal_stmt", "'nonlocal' NAME (',' NAME)*") -def convert_nonlocal_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (nonlocal_token, *names) = children - return WithLeadingWhitespace( - Nonlocal( - names=tuple(_construct_nameitems(config, names)), - whitespace_after_nonlocal=parse_simple_whitespace( - config, names[0].whitespace_before - ), - ), - nonlocal_token.whitespace_before, - ) - - -@with_production("assert_stmt", "'assert' test [',' test]") -def convert_assert_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 2: - (assert_token, test) = children - assert_node = Assert( - whitespace_after_assert=parse_simple_whitespace( - config, test.whitespace_before - ), - test=test.value, - msg=None, - ) - else: - (assert_token, test, comma_token, msg) = children - assert_node = Assert( - whitespace_after_assert=parse_simple_whitespace( - config, test.whitespace_before - ), - test=test.value, - comma=Comma( - whitespace_before=parse_simple_whitespace( - config, comma_token.whitespace_before - ), - whitespace_after=parse_simple_whitespace(config, msg.whitespace_before), - ), - msg=msg.value, - ) - - return WithLeadingWhitespace(assert_node, assert_token.whitespace_before) - - -@with_production( - "compound_stmt", - ("if_stmt | while_stmt | asyncable_stmt | try_stmt | classdef | decorated"), -) -def convert_compound_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (stmt,) = children - return stmt - - -@with_production( - "if_stmt", "'if' test ':' suite [if_stmt_elif|if_stmt_else]", version="<=3.7" -) -@with_production( - "if_stmt", - "'if' namedexpr_test ':' suite [if_stmt_elif|if_stmt_else]", - version=">=3.8", -) -def convert_if_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - if_tok, test, colon_tok, suite, *tail = children - - if len(tail) > 0: - (orelse,) = tail - else: - orelse = None - - return If( - leading_lines=parse_empty_lines(config, if_tok.whitespace_before), - whitespace_before_test=parse_simple_whitespace(config, if_tok.whitespace_after), - test=test.value, - whitespace_after_test=parse_simple_whitespace( - config, colon_tok.whitespace_before - ), - body=suite, - orelse=orelse, - ) - - -@with_production( - "if_stmt_elif", "'elif' test ':' suite [if_stmt_elif|if_stmt_else]", version="<=3.7" -) -@with_production( - "if_stmt_elif", - "'elif' namedexpr_test ':' suite [if_stmt_elif|if_stmt_else]", - version=">=3.8", -) -def convert_if_stmt_elif(config: ParserConfig, children: Sequence[Any]) -> Any: - # this behaves exactly the same as `convert_if_stmt`, except that the leading token - # has a different string value. - return convert_if_stmt(config, children) - - -@with_production("if_stmt_else", "'else' ':' suite") -def convert_if_stmt_else(config: ParserConfig, children: Sequence[Any]) -> Any: - else_tok, colon_tok, suite = children - return Else( - leading_lines=parse_empty_lines(config, else_tok.whitespace_before), - whitespace_before_colon=parse_simple_whitespace( - config, colon_tok.whitespace_before - ), - body=suite, - ) - - -@with_production( - "while_stmt", "'while' test ':' suite ['else' ':' suite]", version="<=3.7" -) -@with_production( - "while_stmt", "'while' namedexpr_test ':' suite ['else' ':' suite]", version=">=3.8" -) -def convert_while_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - while_token, test, while_colon_token, while_suite, *else_block = children - - if len(else_block) > 0: - (else_token, else_colon_token, else_suite) = else_block - orelse = Else( - leading_lines=parse_empty_lines(config, else_token.whitespace_before), - whitespace_before_colon=parse_simple_whitespace( - config, else_colon_token.whitespace_before - ), - body=else_suite, - ) - else: - orelse = None - - return While( - leading_lines=parse_empty_lines(config, while_token.whitespace_before), - whitespace_after_while=parse_simple_whitespace( - config, while_token.whitespace_after - ), - test=test.value, - whitespace_before_colon=parse_simple_whitespace( - config, while_colon_token.whitespace_before - ), - body=while_suite, - orelse=orelse, - ) - - -@with_production( - "for_stmt", "'for' exprlist 'in' testlist ':' suite ['else' ':' suite]" -) -def convert_for_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - ( - for_token, - expr, - in_token, - test, - for_colon_token, - for_suite, - *else_block, - ) = children - - if len(else_block) > 0: - (else_token, else_colon_token, else_suite) = else_block - orelse = Else( - leading_lines=parse_empty_lines(config, else_token.whitespace_before), - whitespace_before_colon=parse_simple_whitespace( - config, else_colon_token.whitespace_before - ), - body=else_suite, - ) - else: - orelse = None - - return WithLeadingWhitespace( - For( - whitespace_after_for=parse_simple_whitespace( - config, for_token.whitespace_after - ), - target=expr.value, - whitespace_before_in=parse_simple_whitespace( - config, in_token.whitespace_before - ), - whitespace_after_in=parse_simple_whitespace( - config, in_token.whitespace_after - ), - iter=test.value, - whitespace_before_colon=parse_simple_whitespace( - config, for_colon_token.whitespace_before - ), - body=for_suite, - orelse=orelse, - ), - for_token.whitespace_before, - ) - - -@with_production( - "try_stmt", - "('try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite))", -) -def convert_try_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - trytoken, try_colon_token, try_suite, *rest = children - handlers: List[ExceptHandler] = [] - orelse: Optional[Else] = None - finalbody: Optional[Finally] = None - - for clause, colon_token, suite in grouper(rest, 3): - if isinstance(clause, Token): - if clause.string == "else": - if orelse is not None: - raise CSTLogicError("Logic error!") - orelse = Else( - leading_lines=parse_empty_lines(config, clause.whitespace_before), - whitespace_before_colon=parse_simple_whitespace( - config, colon_token.whitespace_before - ), - body=suite, - ) - elif clause.string == "finally": - if finalbody is not None: - raise CSTLogicError("Logic error!") - finalbody = Finally( - leading_lines=parse_empty_lines(config, clause.whitespace_before), - whitespace_before_colon=parse_simple_whitespace( - config, colon_token.whitespace_before - ), - body=suite, - ) - else: - raise CSTLogicError("Logic error!") - elif isinstance(clause, ExceptClausePartial): - handlers.append( - ExceptHandler( - body=suite, - type=clause.type, - name=clause.name, - leading_lines=clause.leading_lines, - whitespace_after_except=clause.whitespace_after_except, - whitespace_before_colon=parse_simple_whitespace( - config, colon_token.whitespace_before - ), - ) - ) - else: - raise CSTLogicError("Logic error!") - - return Try( - leading_lines=parse_empty_lines(config, trytoken.whitespace_before), - whitespace_before_colon=parse_simple_whitespace( - config, try_colon_token.whitespace_before - ), - body=try_suite, - handlers=tuple(handlers), - orelse=orelse, - finalbody=finalbody, - ) - - -@with_production("except_clause", "'except' [test ['as' NAME]]") -def convert_except_clause(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (except_token,) = children - whitespace_after_except = SimpleWhitespace("") - test = None - name = None - elif len(children) == 2: - (except_token, test_node) = children - whitespace_after_except = parse_simple_whitespace( - config, except_token.whitespace_after - ) - test = test_node.value - name = None - else: - (except_token, test_node, as_token, name_token) = children - whitespace_after_except = parse_simple_whitespace( - config, except_token.whitespace_after - ) - test = test_node.value - name = AsName( - whitespace_before_as=parse_simple_whitespace( - config, as_token.whitespace_before - ), - whitespace_after_as=parse_simple_whitespace( - config, as_token.whitespace_after - ), - name=Name(name_token.string), - ) - - return ExceptClausePartial( - leading_lines=parse_empty_lines(config, except_token.whitespace_before), - whitespace_after_except=whitespace_after_except, - type=test, - name=name, - ) - - -@with_production( - "with_stmt", "'with' with_item (',' with_item)* ':' suite", version=">=3.1" -) -@with_production("with_stmt", "'with' with_item ':' suite", version="<3.1") -def convert_with_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - (with_token, *items, colon_token, suite) = children - item_nodes: List[WithItem] = [] - - for with_item, maybe_comma in grouper(items, 2): - if maybe_comma is not None: - item_nodes.append( - with_item.with_changes( - comma=Comma( - whitespace_before=parse_parenthesizable_whitespace( - config, maybe_comma.whitespace_before - ), - whitespace_after=parse_parenthesizable_whitespace( - config, maybe_comma.whitespace_after - ), - ) - ) - ) - else: - item_nodes.append(with_item) - - return WithLeadingWhitespace( - With( - whitespace_after_with=parse_simple_whitespace( - config, with_token.whitespace_after - ), - items=tuple(item_nodes), - whitespace_before_colon=parse_simple_whitespace( - config, colon_token.whitespace_before - ), - body=suite, - ), - with_token.whitespace_before, - ) - - -@with_production("with_item", "test ['as' expr]") -def convert_with_item(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 3: - (test, as_token, expr_node) = children - test_node = test.value - asname = AsName( - whitespace_before_as=parse_simple_whitespace( - config, as_token.whitespace_before - ), - whitespace_after_as=parse_simple_whitespace( - config, as_token.whitespace_after - ), - name=expr_node.value, - ) - else: - (test,) = children - test_node = test.value - asname = None - - return WithItem(item=test_node, asname=asname) - - -def _extract_async( - config: ParserConfig, children: Sequence[Any] -) -> Tuple[List[EmptyLine], Optional[Asynchronous], Any]: - if len(children) == 1: - (stmt,) = children - - whitespace_before = stmt.whitespace_before - asyncnode = None - else: - asynctoken, stmt = children - - whitespace_before = asynctoken.whitespace_before - asyncnode = Asynchronous( - whitespace_after=parse_simple_whitespace( - config, asynctoken.whitespace_after - ) - ) - - return (parse_empty_lines(config, whitespace_before), asyncnode, stmt.value) - - -@with_production("asyncable_funcdef", "[ASYNC] funcdef", version=">=3.5") -@with_production("asyncable_funcdef", "funcdef", version="<3.5") -def convert_asyncable_funcdef(config: ParserConfig, children: Sequence[Any]) -> Any: - leading_lines, asyncnode, funcdef = _extract_async(config, children) - - return funcdef.with_changes( - asynchronous=asyncnode, leading_lines=leading_lines, lines_after_decorators=() - ) - - -@with_production("funcdef", "'def' NAME parameters [funcdef_annotation] ':' suite") -def convert_funcdef(config: ParserConfig, children: Sequence[Any]) -> Any: - defnode, namenode, param_partial, *annotation, colon, suite = children - - # If the trailing paremeter doesn't have a comma, then it owns the trailing - # whitespace before the rpar. Otherwise, the comma owns it (and will have - # already parsed it). We don't check/update ParamStar because if it exists - # then we are guaranteed have at least one kwonly_param. - parameters = param_partial.params - if parameters.star_kwarg is not None: - if parameters.star_kwarg.comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - star_kwarg=parameters.star_kwarg.with_changes( - whitespace_after_param=param_partial.rpar.whitespace_before - ) - ) - elif parameters.kwonly_params: - if parameters.kwonly_params[-1].comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - kwonly_params=( - *parameters.kwonly_params[:-1], - parameters.kwonly_params[-1].with_changes( - whitespace_after_param=param_partial.rpar.whitespace_before - ), - ) - ) - elif isinstance(parameters.star_arg, Param): - if parameters.star_arg.comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - star_arg=parameters.star_arg.with_changes( - whitespace_after_param=param_partial.rpar.whitespace_before - ) - ) - elif parameters.params: - if parameters.params[-1].comma == MaybeSentinel.DEFAULT: - parameters = parameters.with_changes( - params=( - *parameters.params[:-1], - parameters.params[-1].with_changes( - whitespace_after_param=param_partial.rpar.whitespace_before - ), - ) - ) - - return WithLeadingWhitespace( - FunctionDef( - whitespace_after_def=parse_simple_whitespace( - config, defnode.whitespace_after - ), - name=Name(namenode.string), - whitespace_after_name=parse_simple_whitespace( - config, namenode.whitespace_after - ), - whitespace_before_params=param_partial.lpar.whitespace_after, - params=parameters, - returns=None if not annotation else annotation[0], - whitespace_before_colon=parse_simple_whitespace( - config, colon.whitespace_before - ), - body=suite, - ), - defnode.whitespace_before, - ) - - -@with_production("parameters", "'(' [typedargslist] ')'") -def convert_parameters(config: ParserConfig, children: Sequence[Any]) -> Any: - lpar, *paramlist, rpar = children - return FuncdefPartial( - lpar=LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpar.whitespace_after - ) - ), - params=Parameters() if not paramlist else paramlist[0], - rpar=RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpar.whitespace_before - ) - ), - ) - - -@with_production("funcdef_annotation", "'->' test") -def convert_funcdef_annotation(config: ParserConfig, children: Sequence[Any]) -> Any: - arrow, typehint = children - return Annotation( - whitespace_before_indicator=parse_parenthesizable_whitespace( - config, arrow.whitespace_before - ), - whitespace_after_indicator=parse_parenthesizable_whitespace( - config, arrow.whitespace_after - ), - annotation=typehint.value, - ) - - -@with_production("classdef", "'class' NAME ['(' [arglist] ')'] ':' suite") -def convert_classdef(config: ParserConfig, children: Sequence[Any]) -> Any: - classdef, name, *arglist, colon, suite = children - - # First, parse out the comments and empty lines before the statement. - leading_lines = parse_empty_lines(config, classdef.whitespace_before) - - # Compute common whitespace and nodes - whitespace_after_class = parse_simple_whitespace(config, classdef.whitespace_after) - namenode = Name(name.string) - whitespace_after_name = parse_simple_whitespace(config, name.whitespace_after) - - # Now, construct the classdef node itself - if not arglist: - # No arglist, so no arguments to this class - return ClassDef( - leading_lines=leading_lines, - lines_after_decorators=(), - whitespace_after_class=whitespace_after_class, - name=namenode, - whitespace_after_name=whitespace_after_name, - body=suite, - ) - else: - # Unwrap arglist partial, because its valid to not have any - lpar, *args, rpar = arglist - args = args[0].args if args else [] - - bases: List[Arg] = [] - keywords: List[Arg] = [] - - current_arg = bases - for arg in args: - if arg.star == "**" or arg.keyword is not None: - current_arg = keywords - # Some quick validation - if current_arg is keywords and ( - arg.star == "*" or (arg.star == "" and arg.keyword is None) - ): - raise PartialParserSyntaxError( - "Positional argument follows keyword argument." - ) - current_arg.append(arg) - - return ClassDef( - leading_lines=leading_lines, - lines_after_decorators=(), - whitespace_after_class=whitespace_after_class, - name=namenode, - whitespace_after_name=whitespace_after_name, - lpar=LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpar.whitespace_after - ) - ), - bases=bases, - keywords=keywords, - rpar=RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpar.whitespace_before - ) - ), - whitespace_before_colon=parse_simple_whitespace( - config, colon.whitespace_before - ), - body=suite, - ) - - -@with_production("decorator", "'@' dotted_name [ '(' [arglist] ')' ] NEWLINE") -def convert_decorator(config: ParserConfig, children: Sequence[Any]) -> Any: - atsign, name, *arglist, newline = children - if not arglist: - # This is either a name or an attribute node, so just extract it. - decoratornode = name - else: - # This needs to be converted into a call node, and we have the - # arglist partial. - lpar, *args, rpar = arglist - args = args[0].args if args else [] - - # If the trailing argument doesn't have a comma, then it owns the - # trailing whitespace before the rpar. Otherwise, the comma owns - # it. - if len(args) > 0 and args[-1].comma == MaybeSentinel.DEFAULT: - args[-1] = args[-1].with_changes( - whitespace_after_arg=parse_parenthesizable_whitespace( - config, rpar.whitespace_before - ) - ) - - decoratornode = Call( - func=name, - whitespace_after_func=parse_simple_whitespace( - config, lpar.whitespace_before - ), - whitespace_before_args=parse_parenthesizable_whitespace( - config, lpar.whitespace_after - ), - args=tuple(args), - ) - - return Decorator( - leading_lines=parse_empty_lines(config, atsign.whitespace_before), - whitespace_after_at=parse_simple_whitespace(config, atsign.whitespace_after), - decorator=decoratornode, - trailing_whitespace=newline, - ) - - -@with_production("decorators", "decorator+") -def convert_decorators(config: ParserConfig, children: Sequence[Any]) -> Any: - return DecoratorPartial(decorators=children) - - -@with_production("decorated", "decorators (classdef | asyncable_funcdef)") -def convert_decorated(config: ParserConfig, children: Sequence[Any]) -> Any: - partial, class_or_func = children - - # First, split up the spacing on the first decorator - leading_lines = partial.decorators[0].leading_lines - - # Now, redistribute ownership of the whitespace - decorators = ( - partial.decorators[0].with_changes(leading_lines=()), - *partial.decorators[1:], - ) - - # Now, modify the original function or class to add the decorators. - return class_or_func.with_changes( - leading_lines=leading_lines, - # pyre-fixme[60]: Concatenation not yet support for multiple variadic - # tuples: `*class_or_func.leading_lines, - # *class_or_func.lines_after_decorators`. - # pyre-fixme[60]: Expected to unpack an iterable, but got `unknown`. - lines_after_decorators=( - *class_or_func.leading_lines, - *class_or_func.lines_after_decorators, - ), - decorators=decorators, - ) - - -@with_production( - "asyncable_stmt", "[ASYNC] (funcdef | with_stmt | for_stmt)", version=">=3.5" -) -@with_production("asyncable_stmt", "funcdef | with_stmt | for_stmt", version="<3.5") -def convert_asyncable_stmt(config: ParserConfig, children: Sequence[Any]) -> Any: - leading_lines, asyncnode, stmtnode = _extract_async(config, children) - if isinstance(stmtnode, FunctionDef): - return stmtnode.with_changes( - asynchronous=asyncnode, - leading_lines=leading_lines, - lines_after_decorators=(), - ) - elif isinstance(stmtnode, With): - return stmtnode.with_changes( - asynchronous=asyncnode, leading_lines=leading_lines - ) - elif isinstance(stmtnode, For): - return stmtnode.with_changes( - asynchronous=asyncnode, leading_lines=leading_lines - ) - else: - raise CSTLogicError("Logic error!") - - -@with_production("suite", "simple_stmt_suite | indented_suite") -def convert_suite(config: ParserConfig, children: Sequence[Any]) -> Any: - (suite,) = children - return suite - - -@with_production("indented_suite", "NEWLINE INDENT stmt+ DEDENT") -def convert_indented_suite(config: ParserConfig, children: Sequence[Any]) -> Any: - newline, indent, *stmts, dedent = children - return IndentedBlock( - header=newline, - indent=( - None - if indent.relative_indent == config.default_indent - else indent.relative_indent - ), - body=stmts, - # We want to be able to only keep comments in the footer that are actually for - # this IndentedBlock. We do so by assuming that lines which are indented to the - # same level as the block itself are comments that go at the footer of the - # block. Comments that are indented to less than this indent are assumed to - # belong to the next line of code. We override the indent here because the - # dedent node's absolute indent is the resulting indentation after the dedent - # is performed. Its this way because the whitespace state for both the dedent's - # whitespace_after and the next BaseCompoundStatement's whitespace_before is - # shared. This allows us to partially parse here and parse the rest of the - # whitespace and comments on the next line, effectively making sure that - # comments are attached to the correct node. - footer=parse_empty_lines( - config, - dedent.whitespace_after, - override_absolute_indent=indent.whitespace_before.absolute_indent, - ), - ) diff --git a/libcst/_parser/conversions/terminals.py b/libcst/_parser/conversions/terminals.py deleted file mode 100644 index f5697229..00000000 --- a/libcst/_parser/conversions/terminals.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -# pyre-unsafe - -from typing import Any - -from libcst._nodes.expression import SimpleString -from libcst._parser.types.config import ParserConfig -from libcst._parser.types.partials import WithLeadingWhitespace -from libcst._parser.types.token import Token -from libcst._parser.whitespace_parser import ( - parse_empty_lines, - parse_trailing_whitespace, -) - - -def convert_NAME(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_NUMBER(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_STRING(config: ParserConfig, token: Token) -> Any: - return WithLeadingWhitespace(SimpleString(token.string), token.whitespace_before) - - -def convert_OP(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_NEWLINE(config: ParserConfig, token: Token) -> Any: - # A NEWLINE token is only emitted for semantic newlines, which means that this - # corresponds to a TrailingWhitespace, since that's the only semantic - # newline-containing node. - - # N.B. Because this token is whitespace, and because the whitespace parser doesn't - # try to prevent overflows, `token.whitespace_before` will end up overflowing into - # the value of this newline token, so `parse_trailing_whitespace` will include - # token.string's value. This is expected and desired behavior. - return parse_trailing_whitespace(config, token.whitespace_before) - - -def convert_INDENT(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_DEDENT(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_ENDMARKER(config: ParserConfig, token: Token) -> Any: - # Parse any and all empty lines with an indent similar to the header. That is, - # indent of nothing and including all indents. In some cases, like when the - # footer parser follows an indented suite, the state's indent can be wrong - # due to the fact that it is shared with the _DEDENT node. We know that if - # we're parsing the end of a file, we will have no indent. - return parse_empty_lines( - config, token.whitespace_before, override_absolute_indent="" - ) - - -def convert_FSTRING_START(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_FSTRING_END(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_FSTRING_STRING(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_ASYNC(config: ParserConfig, token: Token) -> Any: - return token - - -def convert_AWAIT(config: ParserConfig, token: Token) -> Any: - return token diff --git a/libcst/_parser/entrypoints.py b/libcst/_parser/entrypoints.py index 965b8e22..d034258c 100644 --- a/libcst/_parser/entrypoints.py +++ b/libcst/_parser/entrypoints.py @@ -12,6 +12,7 @@ from functools import partial from typing import Union +from libcst import native from libcst._nodes.base import CSTNode from libcst._nodes.expression import BaseExpression from libcst._nodes.module import Module @@ -22,9 +23,6 @@ _DEFAULT_PARTIAL_PARSER_CONFIG: PartialParserConfig = PartialParserConfig() -from libcst import native - - def _parse( entrypoint: str, source: Union[str, bytes], diff --git a/libcst/_parser/types/__init__.py b/libcst/_parser/types/__init__.py deleted file mode 100644 index 7bec24cb..00000000 --- a/libcst/_parser/types/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. diff --git a/libcst/_parser/types/conversions.py b/libcst/_parser/types/conversions.py deleted file mode 100644 index 4c589c52..00000000 --- a/libcst/_parser/types/conversions.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Any, Callable, Sequence - -from libcst._parser.types.config import ParserConfig -from libcst._parser.types.token import Token - -# pyre-fixme[33]: Aliased annotation cannot contain `Any`. -NonterminalConversion = Callable[[ParserConfig, Sequence[Any]], Any] -# pyre-fixme[33]: Aliased annotation cannot contain `Any`. -TerminalConversion = Callable[[ParserConfig, Token], Any] diff --git a/libcst/_parser/types/partials.py b/libcst/_parser/types/partials.py deleted file mode 100644 index 4db89fab..00000000 --- a/libcst/_parser/types/partials.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -from dataclasses import dataclass -from typing import Generic, Optional, Sequence, TypeVar, Union - -from libcst._add_slots import add_slots -from libcst._nodes.expression import ( - Annotation, - Arg, - Attribute, - BaseExpression, - BaseFormattedStringContent, - Index, - LeftParen, - LeftSquareBracket, - Name, - Parameters, - RightParen, - RightSquareBracket, - Slice, - SubscriptElement, -) -from libcst._nodes.op import AssignEqual, BaseAugOp, Colon, Dot -from libcst._nodes.statement import AsName, BaseSmallStatement, Decorator, ImportAlias -from libcst._nodes.whitespace import EmptyLine, SimpleWhitespace, TrailingWhitespace -from libcst._parser.types.whitespace_state import WhitespaceState - -_T = TypeVar("_T") - - -@add_slots -@dataclass(frozen=True) -class WithLeadingWhitespace(Generic[_T]): - value: _T - whitespace_before: WhitespaceState - - -@add_slots -@dataclass(frozen=True) -class SimpleStatementPartial: - body: Sequence[BaseSmallStatement] - whitespace_before: WhitespaceState - trailing_whitespace: TrailingWhitespace - - -@add_slots -@dataclass(frozen=True) -class SlicePartial: - second_colon: Colon - step: Optional[BaseExpression] - - -@add_slots -@dataclass(frozen=True) -class AttributePartial: - dot: Dot - attr: Name - - -@add_slots -@dataclass(frozen=True) -class ArglistPartial: - args: Sequence[Arg] - - -@add_slots -@dataclass(frozen=True) -class CallPartial: - lpar: WithLeadingWhitespace[LeftParen] - args: Sequence[Arg] - rpar: RightParen - - -@add_slots -@dataclass(frozen=True) -class SubscriptPartial: - slice: Union[Index, Slice, Sequence[SubscriptElement]] - lbracket: LeftSquareBracket - rbracket: RightSquareBracket - whitespace_before: WhitespaceState - - -@add_slots -@dataclass(frozen=True) -class AnnAssignPartial: - annotation: Annotation - equal: Optional[AssignEqual] - value: Optional[BaseExpression] - - -@add_slots -@dataclass(frozen=True) -class AugAssignPartial: - operator: BaseAugOp - value: BaseExpression - - -@add_slots -@dataclass(frozen=True) -class AssignPartial: - equal: AssignEqual - value: BaseExpression - - -class ParamStarPartial: - pass - - -@add_slots -@dataclass(frozen=True) -class FuncdefPartial: - lpar: LeftParen - params: Parameters - rpar: RightParen - - -@add_slots -@dataclass(frozen=True) -class DecoratorPartial: - decorators: Sequence[Decorator] - - -@add_slots -@dataclass(frozen=True) -class ImportPartial: - names: Sequence[ImportAlias] - - -@add_slots -@dataclass(frozen=True) -class ImportRelativePartial: - relative: Sequence[Dot] - module: Optional[Union[Attribute, Name]] - - -@add_slots -@dataclass(frozen=True) -class FormattedStringConversionPartial: - value: str - whitespace_before: WhitespaceState - - -@add_slots -@dataclass(frozen=True) -class FormattedStringFormatSpecPartial: - values: Sequence[BaseFormattedStringContent] - whitespace_before: WhitespaceState - - -@add_slots -@dataclass(frozen=True) -class ExceptClausePartial: - leading_lines: Sequence[EmptyLine] - whitespace_after_except: SimpleWhitespace - type: Optional[BaseExpression] = None - name: Optional[AsName] = None diff --git a/libcst/_parser/types/production.py b/libcst/_parser/types/production.py deleted file mode 100644 index dfeffe7b..00000000 --- a/libcst/_parser/types/production.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -from dataclasses import dataclass -from typing import Optional - - -@dataclass(frozen=True) -class Production: - name: str - children: str - version: Optional[str] - future: Optional[str] - - def __str__(self) -> str: - return f"{self.name}: {self.children}" diff --git a/libcst/_parser/types/py_token.py b/libcst/_parser/types/py_token.py deleted file mode 100644 index d2f9b537..00000000 --- a/libcst/_parser/types/py_token.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -from dataclasses import dataclass -from typing import Optional, Tuple - -from libcst._add_slots import add_slots -from libcst._parser.parso.python.token import TokenType -from libcst._parser.types.whitespace_state import WhitespaceState - - -@add_slots -@dataclass(frozen=True) -class Token: - type: TokenType - string: str - # The start of where `string` is in the source, not including leading whitespace. - start_pos: Tuple[int, int] - # The end of where `string` is in the source, not including trailing whitespace. - end_pos: Tuple[int, int] - whitespace_before: WhitespaceState - whitespace_after: WhitespaceState - # The relative indent this token adds. - relative_indent: Optional[str] diff --git a/libcst/_parser/types/py_whitespace_state.py b/libcst/_parser/types/py_whitespace_state.py deleted file mode 100644 index 6359e83e..00000000 --- a/libcst/_parser/types/py_whitespace_state.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from dataclasses import dataclass - -from libcst._add_slots import add_slots - - -@add_slots -@dataclass(frozen=False) -class WhitespaceState: - """ - A frequently mutated store of the whitespace parser's current state. This object - must be cloned prior to speculative parsing. - - This is in contrast to the `config` object each whitespace parser function takes, - which is frozen and never mutated. - - Whitespace parsing works by mutating this state object. By encapsulating saving, and - re-using state objects inside the top-level python parser, the whitespace parser is - able to be reentrant. One 'convert' function can consume part of the whitespace, and - another 'convert' function can consume the rest, depending on who owns what - whitespace. - - This is similar to the approach you might take to parse nested languages (e.g. - JavaScript inside of HTML). We're treating whitespace as a separate language and - grammar from the rest of Python's grammar. - """ - - line: int # one-indexed (to match parso's behavior) - column: int # zero-indexed (to match parso's behavior) - # What to look for when executing `_parse_indent`. - absolute_indent: str - is_parenthesized: bool diff --git a/libcst/_parser/types/token.py b/libcst/_parser/types/token.py deleted file mode 100644 index 32c85ccf..00000000 --- a/libcst/_parser/types/token.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -try: - from libcst_native import tokenize - - Token = tokenize.Token -except ImportError: - from libcst._parser.types.py_token import Token # noqa F401 diff --git a/libcst/_parser/types/whitespace_state.py b/libcst/_parser/types/whitespace_state.py deleted file mode 100644 index 7eaeab32..00000000 --- a/libcst/_parser/types/whitespace_state.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -""" -Defines the state object used by the whitespace parser. -""" - -try: - from libcst_native import whitespace_state as mod -except ImportError: - from libcst._parser.types import py_whitespace_state as mod - -WhitespaceState = mod.WhitespaceState diff --git a/libcst/tests/__main__.py b/libcst/tests/__main__.py index 61403c27..5f0697f2 100644 --- a/libcst/tests/__main__.py +++ b/libcst/tests/__main__.py @@ -7,5 +7,5 @@ if __name__ == "__main__": - print(f"running tests with native parser") + print("running tests with native parser") main(module=None, verbosity=2) From 9060d7f98b5ca06ed4ac3da930b4b53f66342e8f Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 20:38:21 +0000 Subject: [PATCH 10/12] fix: overdid it a bit --- libcst/_parser/types/whitespace_state.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 libcst/_parser/types/whitespace_state.py diff --git a/libcst/_parser/types/whitespace_state.py b/libcst/_parser/types/whitespace_state.py new file mode 100644 index 00000000..7eaeab32 --- /dev/null +++ b/libcst/_parser/types/whitespace_state.py @@ -0,0 +1,15 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Defines the state object used by the whitespace parser. +""" + +try: + from libcst_native import whitespace_state as mod +except ImportError: + from libcst._parser.types import py_whitespace_state as mod + +WhitespaceState = mod.WhitespaceState From 98b1434fd104d7596c39ceb73c8002ebfc2e61fb Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 16:42:00 -0400 Subject: [PATCH 11/12] fix: overdid it again --- libcst/_parser/types/py_whitespace_state.py | 36 +++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 libcst/_parser/types/py_whitespace_state.py diff --git a/libcst/_parser/types/py_whitespace_state.py b/libcst/_parser/types/py_whitespace_state.py new file mode 100644 index 00000000..6359e83e --- /dev/null +++ b/libcst/_parser/types/py_whitespace_state.py @@ -0,0 +1,36 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass + +from libcst._add_slots import add_slots + + +@add_slots +@dataclass(frozen=False) +class WhitespaceState: + """ + A frequently mutated store of the whitespace parser's current state. This object + must be cloned prior to speculative parsing. + + This is in contrast to the `config` object each whitespace parser function takes, + which is frozen and never mutated. + + Whitespace parsing works by mutating this state object. By encapsulating saving, and + re-using state objects inside the top-level python parser, the whitespace parser is + able to be reentrant. One 'convert' function can consume part of the whitespace, and + another 'convert' function can consume the rest, depending on who owns what + whitespace. + + This is similar to the approach you might take to parse nested languages (e.g. + JavaScript inside of HTML). We're treating whitespace as a separate language and + grammar from the rest of Python's grammar. + """ + + line: int # one-indexed (to match parso's behavior) + column: int # zero-indexed (to match parso's behavior) + # What to look for when executing `_parse_indent`. + absolute_indent: str + is_parenthesized: bool From bc7607e0efd75bc237e74ce80f83bba44b86682b Mon Sep 17 00:00:00 2001 From: thereversiblewheel Date: Thu, 3 Jul 2025 20:47:07 +0000 Subject: [PATCH 12/12] fix: remove combine step from ci --- .github/workflows/ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8389b608..4b0fa767 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,9 +40,7 @@ jobs: - name: Native Parser Tests run: uv run poe test - name: Coverage - run: | - uv run coverage combine .coverage.pure - uv run coverage report + run: uv run coverage report # Run linters lint: