From abdb16a6fe53f1cb7dd1b8b294e1b305a961a147 Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Fri, 6 Dec 2024 07:55:21 +0000 Subject: [PATCH 1/7] First commit: add named groups for python --- python/cucumber_expressions/argument.py | 35 ++++---- python/cucumber_expressions/ast.py | 66 ++++----------- ...binatorial_generated_expression_factory.py | 8 +- python/cucumber_expressions/expression.py | 50 ++++++------ .../expression_factory.py | 38 +++++++++ .../expression_generator.py | 12 +-- .../cucumber_expressions/expression_parser.py | 32 ++++---- .../expression_tokenizer.py | 8 +- .../generated_expression.py | 6 +- python/cucumber_expressions/group.py | 32 ++------ python/cucumber_expressions/group_builder.py | 50 ++++-------- python/cucumber_expressions/parameter_type.py | 80 +++++++++---------- .../parameter_type_registry.py | 21 +++-- .../regular_expression.py | 59 +++++++++++--- python/cucumber_expressions/tree_regexp.py | 64 +++++++++------ python/pyproject.toml | 2 +- python/tests/test_argument.py | 2 +- python/tests/test_expression.py | 69 ++++++++-------- python/tests/test_expression_factory.py | 31 +++++++ python/tests/test_regular_expression.py | 2 +- python/tests/test_tree_regex.py | 16 +++- 21 files changed, 369 insertions(+), 314 deletions(-) create mode 100644 python/cucumber_expressions/expression_factory.py create mode 100644 python/tests/test_expression_factory.py diff --git a/python/cucumber_expressions/argument.py b/python/cucumber_expressions/argument.py index 040c55456..759e5aed3 100644 --- a/python/cucumber_expressions/argument.py +++ b/python/cucumber_expressions/argument.py @@ -1,42 +1,45 @@ from __future__ import annotations -from typing import Optional, List +from typing import Optional -from cucumber_expressions.group import Group from cucumber_expressions.parameter_type import ParameterType -from cucumber_expressions.tree_regexp import TreeRegexp +from cucumber_expressions.tree_regexp import TreeRegexp, Group from cucumber_expressions.errors import CucumberExpressionError class Argument: - def __init__(self, group, parameter_type): - self._group: Group = group - self.parameter_type: ParameterType = parameter_type + def __init__(self, group: Group, parameter_type: ParameterType, name: Optional[str]): + self.group = group + self.parameter_type = parameter_type + self.name = name @staticmethod def build( - tree_regexp: TreeRegexp, text: str, parameter_types: List - ) -> Optional[List[Argument]]: + tree_regexp: TreeRegexp, text: str, parameter_types_and_names: list[tuple[ParameterType, Optional[str]]] + ) -> Optional[list[Argument]]: + # Check if all elements in parameter_types_and_names are tuples + for item in parameter_types_and_names: + if not isinstance(item, tuple) or len(item) != 2: + raise CucumberExpressionError( + f"Expected a tuple of (ParameterType, Optional[str]), but got {type(item)}: {item}" + ) + match_group = tree_regexp.match(text) if not match_group: return None arg_groups = match_group.children - if len(arg_groups) != len(parameter_types): + if len(arg_groups) != len(parameter_types_and_names): raise CucumberExpressionError( - f"Group has {len(arg_groups)} capture groups, but there were {len(parameter_types)} parameter types" + f"Group has {len(arg_groups)} capture groups, but there were {len(parameter_types_and_names)} parameter types/names" ) return [ - Argument(arg_group, parameter_type) - for parameter_type, arg_group in zip(parameter_types, arg_groups) + Argument(arg_group, parameter_type, parameter_name) + for (parameter_type, parameter_name), arg_group in zip(parameter_types_and_names, arg_groups) ] @property def value(self): return self.parameter_type.transform(self.group.values if self.group else None) - - @property - def group(self): - return self._group diff --git a/python/cucumber_expressions/ast.py b/python/cucumber_expressions/ast.py index f6ddb390f..10ac73b85 100644 --- a/python/cucumber_expressions/ast.py +++ b/python/cucumber_expressions/ast.py @@ -1,7 +1,7 @@ from __future__ import annotations from enum import Enum -from typing import Optional, List +from typing import Optional, Any class NodeType(Enum): @@ -41,44 +41,24 @@ class Node: def __init__( self, ast_type: NodeType, - nodes: Optional[List[Node]], + nodes: Optional[list[Node]], token: Optional[str], start: int, end: int, ): if nodes is None and token is None: raise Exception("Either nodes or token must be defined") - self._ast_type = ast_type - self._nodes = nodes - self._token = token - self._start = start - self._end = end - - @property - def ast_type(self) -> NodeType: - return self._ast_type - - @property - def nodes(self) -> List[Node]: - return self._nodes - - @property - def token(self) -> str: - return self._token - - @property - def start(self) -> int: - return self._start - - @property - def end(self) -> int: - return self._end + self.ast_type = ast_type + self.nodes = nodes + self.token = token + self.start = start + self.end = end @property def text(self) -> str: return self.token or "".join([node_value.text for node_value in self.nodes]) - def to_json(self): + def to_json(self) -> dict[str, Any]: json_obj = {"type": self.ast_type.value} if self.nodes is not None: json_obj["nodes"] = [node_value.to_json() for node_value in self.nodes] @@ -91,26 +71,10 @@ def to_json(self): class Token: def __init__(self, ast_type: TokenType, text: str, start: int, end: int): - self._ast_type = ast_type - self._text = text - self._start = start - self._end = end - - @property - def ast_type(self): - return self._ast_type - - @property - def text(self): - return self._text - - @property - def start(self): - return self._start - - @property - def end(self): - return self._end + self.ast_type = ast_type + self.text = text + self.start = start + self.end = end @staticmethod def is_escape_character(char: str) -> bool: @@ -140,7 +104,7 @@ def type_of(char: str) -> TokenType: return TokenType.TEXT @staticmethod - def symbol_of(token: TokenType): + def symbol_of(token: TokenType) -> str: possible_token_character_key = token.name + "_CHARACTER" if any( e.name @@ -151,7 +115,7 @@ def symbol_of(token: TokenType): return "" @staticmethod - def purpose_of(token: TokenType): + def purpose_of(token: TokenType) -> str: if token in [TokenType.BEGIN_OPTIONAL, TokenType.END_OPTIONAL]: return "optional text" if token in [TokenType.BEGIN_PARAMETER, TokenType.END_PARAMETER]: @@ -160,7 +124,7 @@ def purpose_of(token: TokenType): return "alternation" return "" - def to_json(self): + def to_json(self) -> dict[str, Any]: return { "type": self.ast_type.value, "text": self.text, diff --git a/python/cucumber_expressions/combinatorial_generated_expression_factory.py b/python/cucumber_expressions/combinatorial_generated_expression_factory.py index 9b8679ac3..1346c9af9 100644 --- a/python/cucumber_expressions/combinatorial_generated_expression_factory.py +++ b/python/cucumber_expressions/combinatorial_generated_expression_factory.py @@ -1,5 +1,3 @@ -from typing import List - from cucumber_expressions.generated_expression import GeneratedExpression from cucumber_expressions.parameter_type import ParameterType @@ -12,16 +10,16 @@ def __init__(self, expression_template, parameter_type_combinations): self.expression_template = expression_template self.parameter_type_combinations = parameter_type_combinations - def generate_expressions(self) -> List[GeneratedExpression]: + def generate_expressions(self) -> list[GeneratedExpression]: generated_expressions = [] self.generate_permutations(generated_expressions, 0, []) return generated_expressions def generate_permutations( self, - generated_expressions: List[GeneratedExpression], + generated_expressions: list[GeneratedExpression], depth: int, - current_parameter_types: List[ParameterType], + current_parameter_types: list[ParameterType], ): if len(generated_expressions) >= MAX_EXPRESSIONS: return diff --git a/python/cucumber_expressions/expression.py b/python/cucumber_expressions/expression.py index 8dbe75b01..c267cb577 100644 --- a/python/cucumber_expressions/expression.py +++ b/python/cucumber_expressions/expression.py @@ -1,33 +1,33 @@ -from typing import Optional, List +from typing import Optional from cucumber_expressions.argument import Argument from cucumber_expressions.ast import Node, NodeType from cucumber_expressions.expression_parser import CucumberExpressionParser from cucumber_expressions.parameter_type import ParameterType +from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry from cucumber_expressions.tree_regexp import TreeRegexp from cucumber_expressions.errors import ( - UndefinedParameterTypeError, ParameterIsNotAllowedInOptional, OptionalIsNotAllowedInOptional, OptionalMayNotBeEmpty, AlternativeMayNotBeEmpty, - AlternativeMayNotExclusivelyContainOptionals, + AlternativeMayNotExclusivelyContainOptionals, UndefinedParameterTypeError, ) ESCAPE_PATTERN = rb"([\\^\[({$.|?*+})\]])" class CucumberExpression: - def __init__(self, expression, parameter_type_registry): + def __init__(self, expression: str, parameter_type_registry: ParameterTypeRegistry): self.expression = expression self.parameter_type_registry = parameter_type_registry - self.parameter_types: List[ParameterType] = [] + self.parameter_types_and_names: list[tuple[ParameterType, Optional[str]]] = [] self.tree_regexp = TreeRegexp( self.rewrite_to_regex(CucumberExpressionParser().parse(self.expression)) ) - def match(self, text: str) -> Optional[List[Argument]]: - return Argument.build(self.tree_regexp, text, self.parameter_types) + def match(self, text: str) -> Optional[list[Argument]]: + return Argument.build(self.tree_regexp, text, self.parameter_types_and_names) @property def source(self): @@ -57,23 +57,17 @@ def rewrite_to_regex(self, node: Node): def escape_regex(expression) -> str: return expression.translate({i: "\\" + chr(i) for i in ESCAPE_PATTERN}) - def rewrite_optional(self, node: Node): - _possible_node_with_params = self.get_possible_node_with_parameters(node) - if _possible_node_with_params: - raise ParameterIsNotAllowedInOptional( - _possible_node_with_params, self.expression - ) - _possible_node_with_optionals = self.get_possible_node_with_optionals(node) - if _possible_node_with_optionals: - raise OptionalIsNotAllowedInOptional( - _possible_node_with_optionals, self.expression - ) + def rewrite_optional(self, node: Node) -> str: + if self.get_possible_node_with_parameters(node): + raise ParameterIsNotAllowedInOptional(self.get_possible_node_with_parameters(node), self.expression) + if self.get_possible_node_with_optionals(node): + raise OptionalIsNotAllowedInOptional(self.get_possible_node_with_optionals(node), self.expression) if self.are_nodes_empty(node): raise OptionalMayNotBeEmpty(node, self.expression) regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) return rf"(?:{regex})?" - def rewrite_alternation(self, node: Node): + def rewrite_alternation(self, node: Node) -> str: for alternative in node.nodes: if not alternative.nodes: raise AlternativeMayNotBeEmpty(alternative, self.expression) @@ -87,20 +81,30 @@ def rewrite_alternation(self, node: Node): def rewrite_alternative(self, node: Node): return "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) - def rewrite_parameter(self, node: Node): + def rewrite_parameter(self, node: Node) -> str: name = node.text - parameter_type = self.parameter_type_registry.lookup_by_type_name(name) + group_name, parameter_type = self.parse_parameter_name(name) if not parameter_type: raise UndefinedParameterTypeError(node, self.expression, name) - self.parameter_types.append(parameter_type) + self.parameter_types_and_names.append((parameter_type, group_name)) regexps = parameter_type.regexps if len(regexps) == 1: return rf"({regexps[0]})" return rf"((?:{')|(?:'.join(regexps)}))" + def parse_parameter_name(self, name: str) -> tuple[Optional[str], Optional[ParameterType]]: + """Helper function to parse the parameter name and return group_name and parameter_type.""" + if ":" in name: + group_name, parameter_type_name = name.split(":") + parameter_type = self.parameter_type_registry.lookup_by_type_name(parameter_type_name) + else: + group_name = None + parameter_type = self.parameter_type_registry.lookup_by_type_name(name) + return group_name, parameter_type + def rewrite_expression(self, node: Node): regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) return rf"^{regex}$" @@ -117,5 +121,5 @@ def get_possible_node_with_optionals(self, node: Node) -> Optional[Node]: return results[0] if results else None @staticmethod - def get_nodes_with_ast_type(node: Node, ast_type: NodeType) -> List[Node]: + def get_nodes_with_ast_type(node: Node, ast_type: NodeType) -> list[Node]: return [ast_node for ast_node in node.nodes if ast_node.ast_type == ast_type] diff --git a/python/cucumber_expressions/expression_factory.py b/python/cucumber_expressions/expression_factory.py new file mode 100644 index 000000000..5753bb8ba --- /dev/null +++ b/python/cucumber_expressions/expression_factory.py @@ -0,0 +1,38 @@ +import re + +from cucumber_expressions.expression import CucumberExpression +from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry +from cucumber_expressions.regular_expression import RegularExpression + +CURLY_BRACKET_PATTERN = re.compile(r"{(.*?)}") +INVALID_CURLY_PATTERN = re.compile(r"^\d+(?:,\d+)?$") + + +class ExpressionFactory: + def __init__(self, parameter_type_registry: ParameterTypeRegistry = ParameterTypeRegistry()): + self.parameter_type_registry = parameter_type_registry + + @staticmethod + def _has_curly_brackets(string: str) -> bool: + return "{" in string and "}" in string + + @staticmethod + def _extract_text_in_curly_brackets(string: str) -> list: + return CURLY_BRACKET_PATTERN.findall(string) + + def is_cucumber_expression(self, expression_string: str): + if not self._has_curly_brackets(expression_string): + return False + bracket_texts = self._extract_text_in_curly_brackets(expression_string) + # Check if any match does not contain an integer or an integer and a comma + for text in bracket_texts: + # Check if the match is a regex pattern (matches integer or integer-comma pattern) + if INVALID_CURLY_PATTERN.match(text): + return False # Found a form of curly bracket + return True # All curly brackets are valid + + + def create_expression(self, expression_string: str): + if self.is_cucumber_expression(expression_string): + return CucumberExpression(expression_string, self.parameter_type_registry) + return RegularExpression(expression_string, self.parameter_type_registry) diff --git a/python/cucumber_expressions/expression_generator.py b/python/cucumber_expressions/expression_generator.py index 116e4523f..c76bd38d4 100644 --- a/python/cucumber_expressions/expression_generator.py +++ b/python/cucumber_expressions/expression_generator.py @@ -1,6 +1,5 @@ import functools import re -from typing import List from cucumber_expressions.generated_expression import GeneratedExpression from cucumber_expressions.parameter_type import ParameterType @@ -8,16 +7,17 @@ from cucumber_expressions.combinatorial_generated_expression_factory import ( CombinatorialGeneratedExpressionFactory, ) +from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry class CucumberExpressionGenerator: - def __init__(self, parameter_type_registry): + def __init__(self, parameter_type_registry: ParameterTypeRegistry): self.parameter_type_registry = parameter_type_registry - def generate_expressions(self, text: str) -> List[GeneratedExpression]: + def generate_expressions(self, text: str) -> list[GeneratedExpression]: parameter_type_combinations = [] parameter_type_matchers = self.create_parameter_type_matchers(text) - expression_template: List[str] = [] + expression_template: list[str] = [] pos = 0 while True: @@ -80,7 +80,7 @@ def escape(string: str) -> str: .replace(r"/", "\\/") ) - def create_parameter_type_matchers(self, text) -> List[ParameterTypeMatcher]: + def create_parameter_type_matchers(self, text) -> list[ParameterTypeMatcher]: parameter_type_matchers = [] for parameter_type in self.parameter_type_registry.parameter_types: if parameter_type.use_for_snippets: @@ -92,7 +92,7 @@ def create_parameter_type_matchers(self, text) -> List[ParameterTypeMatcher]: @staticmethod def create_parameter_type_matchers_with_type( parameter_type, text - ) -> List[ParameterTypeMatcher]: + ) -> list[ParameterTypeMatcher]: return [ ParameterTypeMatcher(parameter_type, re.compile(f"({regexp})"), text, 0) for regexp in parameter_type.regexps diff --git a/python/cucumber_expressions/expression_parser.py b/python/cucumber_expressions/expression_parser.py index bf755fdbd..722c40d4c 100644 --- a/python/cucumber_expressions/expression_parser.py +++ b/python/cucumber_expressions/expression_parser.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import NamedTuple, Optional, Callable, List +from typing import NamedTuple, Optional, Callable from cucumber_expressions.ast import Token, TokenType, Node, NodeType from cucumber_expressions.errors import ( @@ -18,7 +18,7 @@ class Result(NamedTuple): class Parser(NamedTuple): expression: str - tokens: List[Token] + tokens: list[Token] current: int @@ -75,7 +75,7 @@ def parse(self, expression: str) -> Node: # optional := '(' + option* + ')' # option := optional | parameter | text - optional_sub_parsers = [] + optional_sub_parsers: list = [] parse_optional = self.parse_between( NodeType.OPTIONAL, TokenType.BEGIN_OPTIONAL, @@ -170,7 +170,7 @@ def parse_between( ast_type: NodeType, begin_token: TokenType, end_token: TokenType, - parsers: List, + parsers: list, ) -> Callable[[Parser], Result | tuple[int, Node]]: def _parse_between(parser: Parser): if not self.looking_at(parser.tokens, parser.current, begin_token): @@ -205,7 +205,7 @@ def _parse_between(parser: Parser): @staticmethod def parse_token( - expression, parsers: List, tokens: List[Token], start_at: int + expression, parsers: list, tokens: list[Token], start_at: int ) -> Result: for parser in parsers: consumed, ast = parser(Parser(expression, tokens, start_at)) @@ -217,14 +217,14 @@ def parse_token( def parse_tokens_until( self, expression, - parsers: List, - tokens: List[Token], + parsers: list, + tokens: list[Token], start_at: int, - end_tokens: List[TokenType], - ) -> tuple[int, List[Node]]: + end_tokens: list[TokenType], + ) -> tuple[int, list[Node]]: current = start_at size = len(tokens) - ast: List[Node] = [] + ast: list[Node] = [] while current < size: if self.looking_at_any(tokens, current, end_tokens): break @@ -238,14 +238,14 @@ def parse_tokens_until( return current - start_at, ast def looking_at_any( - self, tokens: List[Token], position: int, token_types: List[TokenType] + self, tokens: list[Token], position: int, token_types: list[TokenType] ) -> bool: return any( self.looking_at(tokens, position, token_type) for token_type in token_types ) @staticmethod - def looking_at(tokens: List[Token], position: int, token_type: TokenType) -> bool: + def looking_at(tokens: list[Token], position: int, token_type: TokenType) -> bool: if position < 0: # If configured correctly this will never happen # Keep for completeness @@ -255,8 +255,8 @@ def looking_at(tokens: List[Token], position: int, token_type: TokenType) -> boo return tokens[position].ast_type == token_type def split_alternatives( - self, start: int, end: int, alternation: List[Node] - ) -> List[Node]: + self, start: int, end: int, alternation: list[Node] + ) -> list[Node]: separators = [] alternatives = [] alternative = [] @@ -272,8 +272,8 @@ def split_alternatives( @staticmethod def create_alternative_nodes( - start: int, end: int, separators: List, alternatives: List - ) -> List[Node]: + start: int, end: int, separators: list, alternatives: list + ) -> list[Node]: for index, alternative in enumerate(alternatives): if index == 0: right_separator = separators[index] diff --git a/python/cucumber_expressions/expression_tokenizer.py b/python/cucumber_expressions/expression_tokenizer.py index 92dc3c391..a1ccf11e9 100644 --- a/python/cucumber_expressions/expression_tokenizer.py +++ b/python/cucumber_expressions/expression_tokenizer.py @@ -1,5 +1,3 @@ -from typing import List - from cucumber_expressions.ast import TokenType, Token from cucumber_expressions.errors import ( TheEndOfLineCannotBeEscaped, @@ -10,11 +8,11 @@ class CucumberExpressionTokenizer: def __init__(self): self.expression: str = "" - self.buffer: List[str] = [] + self.buffer: list[str] = [] self.escaped: int = 0 self.buffer_start_index: int = 0 - def tokenize(self, expression: str, to_json: bool = False) -> List[Token]: + def tokenize(self, expression: str, to_json: bool = False) -> list[Token]: self.expression = expression tokens = [] previous_token_type = TokenType.START_OF_LINE @@ -50,7 +48,7 @@ def tokenize(self, expression: str, to_json: bool = False) -> List[Token]: tokens.append(Token(TokenType.END_OF_LINE, "", len(chars), len(chars))) - def convert_to_json_format(_tokens: List[Token]) -> List: + def convert_to_json_format(_tokens: list[Token]) -> list: return [ { "type": t.ast_type.value, diff --git a/python/cucumber_expressions/generated_expression.py b/python/cucumber_expressions/generated_expression.py index 09b9a37bc..54dbb16db 100644 --- a/python/cucumber_expressions/generated_expression.py +++ b/python/cucumber_expressions/generated_expression.py @@ -5,14 +5,14 @@ def __init__(self, expression_template: str, parameter_types): self.usage_by_type_name = {} @property - def source(self): + def source(self) -> str: return self.expression_template % tuple(p.name for p in self.parameter_types) @property - def parameter_names(self): + def parameter_names(self) -> list[str]: return [self.get_parameter_name(t.name) for t in self.parameter_types] - def get_parameter_name(self, type_name): + def get_parameter_name(self, type_name: str) -> str: count = self.usage_by_type_name.get(type_name) or 0 count = count + 1 self.usage_by_type_name[type_name] = count diff --git a/python/cucumber_expressions/group.py b/python/cucumber_expressions/group.py index 06c1a88d2..664a59b44 100644 --- a/python/cucumber_expressions/group.py +++ b/python/cucumber_expressions/group.py @@ -1,30 +1,10 @@ -from __future__ import annotations - -from typing import List - - class Group: - def __init__(self, value: str, start: int, end: int, children: List[Group]): - self._children = children - self._value = value - self._start = start - self._end = end - - @property - def value(self): - return self._value - - @property - def start(self): - return self._start - - @property - def end(self): - return self._end - - @property - def children(self): - return self._children + def __init__(self, value: str, start: int, end: int, children: list["Group"], name: str | None = None): + self.children = children + self.name = name + self.value = value + self.start = start + self.end = end @property def values(self): diff --git a/python/cucumber_expressions/group_builder.py b/python/cucumber_expressions/group_builder.py index 793151105..1d25df129 100644 --- a/python/cucumber_expressions/group_builder.py +++ b/python/cucumber_expressions/group_builder.py @@ -1,53 +1,35 @@ -from __future__ import annotations - -from typing import List - from cucumber_expressions.group import Group class GroupBuilder: def __init__(self): - self._group_builders: List[GroupBuilder] = [] - self._capturing = True - self._source: str = "" - self._end_index = None - self._children: List[GroupBuilder] = [] + self.group_builders: list[GroupBuilder] = [] + self.capturing: bool = True + self.source: str | None = None + self.end_index: int | None = None - def add(self, group_builder: GroupBuilder): - self._group_builders.append(group_builder) + def add(self, group_builder: "GroupBuilder"): + self.group_builders.append(group_builder) - def build(self, match, group_indices) -> Group: + def build(self, match, group_indices, group_name_map: dict) -> Group: group_index = next(group_indices) - children: List[Group] = [ - gb.build(match, group_indices) for gb in self._group_builders + group_name = group_name_map.get(group_index, None) + + children = [ + gb.build(match, group_indices, group_name_map) for gb in self.group_builders ] return Group( + name=group_name, value=match.group(group_index), start=match.regs[group_index][0], end=match.regs[group_index][1], children=children, ) - def move_children_to(self, group_builder: GroupBuilder) -> None: - for child in self._group_builders: + def move_children_to(self, group_builder: "GroupBuilder") -> None: + for child in self.group_builders: group_builder.add(child) @property - def capturing(self): - return self._capturing - - @capturing.setter - def capturing(self, value: bool): - self._capturing = value - - @property - def children(self) -> list[GroupBuilder]: - return self._group_builders - - @property - def source(self) -> str: - return self._source - - @source.setter - def source(self, source: str): - self._source = source + def children(self) -> list["GroupBuilder"]: + return self.group_builders diff --git a/python/cucumber_expressions/parameter_type.py b/python/cucumber_expressions/parameter_type.py index 751835ec1..c89c7f15d 100644 --- a/python/cucumber_expressions/parameter_type.py +++ b/python/cucumber_expressions/parameter_type.py @@ -1,15 +1,49 @@ from __future__ import annotations import re -from typing import Callable, Optional, Pattern +from typing import Callable, Optional, Pattern, TypeVar, Union -from cucumber_expressions.errors import CucumberExpressionError +from .errors import CucumberExpressionError ILLEGAL_PARAMETER_NAME_PATTERN = re.compile(r"([\[\]()$.|?*+])") +T = TypeVar('T') + + class ParameterType: """Creates a new Parameter Type""" + def __init__( + self, + name: str | None, + regexp: list[str] | str | list[Pattern] | Pattern, + type: T, + transformer: Optional[Callable] = None, + use_for_snippets: bool = True, + prefer_for_regexp_match: bool = False, + ): + """Creates a new Parameter + :param name: name of the parameter type + :type name: Optional[str] + :param regexp: regexp or list of regexps for capture groups + :type regexp: list[str], str, list[Pattern] or Pattern + :param type: the return type of the transformed + :type type: class + :param transformer: transforms a str to (possibly) another type + :type transformer: lambda + :param use_for_snippets: if this should be used for snippet generation + :type use_for_snippets: bool + :param prefer_for_regexp_match: if this should be preferred over similar types + :type prefer_for_regexp_match: bool + """ + self.name = name + if self.name: + self._check_parameter_type_name(self.name) + self.type = type + self.transformer = transformer or (lambda value: type(value)) + self.use_for_snippets = use_for_snippets + self.prefer_for_regexp_match = prefer_for_regexp_match + self.regexps = self.to_array(regexp) def _check_parameter_type_name(self, type_name): """Checks if a parameter type name is allowed""" @@ -42,46 +76,6 @@ def compare(pt1: ParameterType, pt2: ParameterType): return 1 return 0 - def __init__( - self, - name, - regexp, - type, - transformer: Optional[Callable] = None, - use_for_snippets: bool = True, - prefer_for_regexp_match: bool = False, - ): - """Creates a new Parameter - :param name: name of the parameter type - :type name: Optional[str] - :param regexp: regexp or list of regexps for capture groups - :type regexp: list[str], str, list[Pattern] or Pattern - :param type: the return type of the transformed - :type type: class - :param transformer: transforms a str to (possibly) another type - :type transformer: lambda - :param use_for_snippets: if this should be used for snippet generation - :type use_for_snippets: bool - :param prefer_for_regexp_match: if this should be preferred over similar types - :type prefer_for_regexp_match: bool - """ - self.name = name - if self.name: - self._check_parameter_type_name(self.name) - self.type = type - self.transformer = transformer or (lambda value: type(value)) - self._use_for_snippets = use_for_snippets - self._prefer_for_regexp_match = prefer_for_regexp_match - self.regexps = self.to_array(regexp) - - @property - def prefer_for_regexp_match(self): - return self._prefer_for_regexp_match - - @property - def use_for_snippets(self): - return self._use_for_snippets - @staticmethod def _get_regexp_source(regexp_pattern: Pattern) -> str: invalid_flags = [re.I, re.M] @@ -96,7 +90,7 @@ def _get_regexp_source(regexp_pattern: Pattern) -> str: ) return regexp_pattern.pattern - def to_array(self, regexps: list[str] | str | list[Pattern] | Pattern) -> list[str]: + def to_array(self, regexps: Union[list[str], str, list[Pattern], Pattern]) -> list[str]: """Make a list of regexps if not already""" array: list = regexps if isinstance(regexps, list) else [regexps] return [ diff --git a/python/cucumber_expressions/parameter_type_registry.py b/python/cucumber_expressions/parameter_type_registry.py index e8fe20c4e..6c84bac81 100644 --- a/python/cucumber_expressions/parameter_type_registry.py +++ b/python/cucumber_expressions/parameter_type_registry.py @@ -1,9 +1,8 @@ import functools import re from decimal import Decimal -from typing import Optional, List +from typing import Optional, Union -from cucumber_expressions.expression_generator import CucumberExpressionGenerator from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.errors import ( CucumberExpressionError, @@ -78,23 +77,23 @@ def __init__(self): ) @property - def parameter_types(self) -> List: + def parameter_types(self) -> list: return list(self.parameter_type_by_name.values()) def lookup_by_type_name(self, name: str) -> Optional[ParameterType]: return self.parameter_type_by_name.get(name) - def lookup_by_regexp( - self, parameter_type_regexp: str, expression_regexp, text: str - ): - raw_regex = rf"{parameter_type_regexp}" - parameter_types = self.parameter_types_by_regexp.get(raw_regex) + def lookup_by_regexp(self, parameter_type_regexp: str, expression_regexp: Union[str, re.Pattern], text: str): + """ + Lookup and match the text using parameter types, then transform the results. + Supports both named and unnamed capture groups. + """ + parameter_types = self.parameter_types_by_regexp.get(parameter_type_regexp) if not parameter_types: return None if len(parameter_types) > 1 and not parameter_types[0].prefer_for_regexp_match: - generated_expressions = CucumberExpressionGenerator( - self - ).generate_expressions(text) + from cucumber_expressions.expression_generator import CucumberExpressionGenerator + generated_expressions = CucumberExpressionGenerator(self).generate_expressions(text) raise AmbiguousParameterTypeError( parameter_type_regexp, expression_regexp, diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index dd8e7a3d8..5bd304ec7 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -1,11 +1,14 @@ import re -from typing import Optional, List +from collections.abc import Generator +from typing import Optional, Union, AnyStr from cucumber_expressions.argument import Argument from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry from cucumber_expressions.tree_regexp import TreeRegexp +NAMED_CAPTURE_GROUP_REGEX = re.compile(r"\?P<([^>]+)>") + class RegularExpression: """Creates a new instance. Use this when the transform types are not known in advance, @@ -13,7 +16,7 @@ class RegularExpression: dynamically typed languages.""" def __init__( - self, expression_regexp, parameter_type_registry: ParameterTypeRegistry + self, expression_regexp: Union[re.Pattern, str], parameter_type_registry: ParameterTypeRegistry ): """Creates a new instance. Use this when the transform types are not known in advance, and should be determined by the regular expression's capture groups. Use this with @@ -27,21 +30,57 @@ def __init__( self.parameter_type_registry = parameter_type_registry self.tree_regexp: TreeRegexp = TreeRegexp(self.expression_regexp.pattern) - def match(self, text) -> Optional[List[Argument]]: - return Argument.build( - self.tree_regexp, text, list(self.generate_parameter_types(text)) + def match(self, text) -> Optional[list[Argument]]: + # Convert the generator to a list before passing it to Argument.build + parameter_types_and_names = list( + (parameter_type, capture_name) + for parameter_type, capture_name in self.generate_parameter_types(text) ) + return Argument.build(self.tree_regexp, text, parameter_types_and_names) + + @staticmethod + def _remove_named_groups(pattern: str) -> str: + """ + Remove named capture groups from the regex pattern using precompiled regex. + """ + return NAMED_CAPTURE_GROUP_REGEX.sub("", pattern) - def generate_parameter_types(self, text): + def _process_capture_group(self, group_source: str): + """ + Check if the capture group is named and extract the name. + If it's a named capture group, return the name and the modified regex. + """ + # Check for named capture group using the precompiled regex + match = NAMED_CAPTURE_GROUP_REGEX.match(group_source) + + if match: + # Extract the name of the capture group + capture_group_name = match.group(1) + # Remove the named group part using the precompiled regex + cleaned_pattern = self._remove_named_groups(group_source) + return capture_group_name, cleaned_pattern + else: + # No named group, just return the original pattern + return None, group_source + + def generate_parameter_types(self, text) -> Generator[tuple[ParameterType, Optional[str]]]: for group_builder in self.tree_regexp.group_builder.children: + # Extract the raw source for the group parameter_type_regexp = group_builder.source + + # Process the capture group (check if it's named and clean the pattern) + capture_name, cleaned_pattern = self._process_capture_group(parameter_type_regexp) + + # Lookup the parameter type using the stripped capture group possible_regexp = self.parameter_type_registry.lookup_by_regexp( - parameter_type_regexp, self.expression_regexp, text + cleaned_pattern, self.expression_regexp, text ) - yield possible_regexp or ParameterType( - None, parameter_type_regexp, str, lambda *s: s[0], False, False + + parameter_type = possible_regexp or ParameterType( + capture_name, cleaned_pattern, str, lambda *s: s[0], False, False ) + yield parameter_type, capture_name @property - def regexp(self): + def regexp(self) -> AnyStr: return self.expression_regexp.pattern diff --git a/python/cucumber_expressions/tree_regexp.py b/python/cucumber_expressions/tree_regexp.py index cda2997aa..17e25f3d5 100644 --- a/python/cucumber_expressions/tree_regexp.py +++ b/python/cucumber_expressions/tree_regexp.py @@ -1,30 +1,31 @@ import re -from typing import List, Pattern +from typing import Pattern, Union, Optional from cucumber_expressions.ast import EscapeCharacters +from cucumber_expressions.group import Group from cucumber_expressions.group_builder import GroupBuilder class TreeRegexp: - def __init__(self, regexp: str): + def __init__(self, regexp: Union[Pattern[str], str]): self.regexp = regexp if isinstance(regexp, Pattern) else re.compile(regexp) - self._group_builder = None - if not self._group_builder: - self._group_builder = self.create_group_builder(self.regexp) + self.group_builder = self.create_group_builder(self.regexp) - def match(self, string: str): + def match(self, string: str) -> Optional[Group]: matches = self.regexp.match(string) if not matches: return None group_indices = range(len(matches.groups()) + 1) - return self.group_builder.build(matches, iter(group_indices)) + group_names_map = {v: k for k, v in self.regexp.groupindex.items()} + return self.group_builder.build(matches, iter(group_indices), group_names_map) def create_group_builder(self, regexp): source = regexp.pattern - stack: List[GroupBuilder] = [GroupBuilder()] + stack: list[GroupBuilder] = [GroupBuilder()] group_start_stack = [] escaping: bool = False char_class: bool = False + for index, char in enumerate(source): if char == "[" and not escaping: char_class = True @@ -35,6 +36,11 @@ def create_group_builder(self, regexp): group_builder = GroupBuilder() if self.is_non_capturing(source, index): group_builder.capturing = False + elif self.is_named_group(source, index): + group_builder.capturing = True + # Handle named groups here (mark their names) + group_name = self.extract_named_group_name(source, index) + group_builder.name = group_name stack.append(group_builder) elif char == ")" and not escaping and not char_class: group_builder = stack.pop() @@ -43,7 +49,7 @@ def create_group_builder(self, regexp): group_start = group_start_stack.pop() group_start = group_start or 0 if group_builder.capturing: - group_builder.source = source[(group_start + 1) : index] + group_builder.source = source[(group_start + 1): index] stack[-1].add(group_builder) else: group_builder.move_children_to(stack[-1]) @@ -51,22 +57,30 @@ def create_group_builder(self, regexp): return stack.pop() @staticmethod - def is_non_capturing(source, index): - # Regex is valid. Bounds check not required. + def is_named_group(source: str, index: int) -> bool: + """ + Check if the group at the given index is a named capturing group, e.g. (?P...). + """ + return source[index + 1: index + 3] == "P<" and source[index + 3] != "?" + + @staticmethod + def extract_named_group_name(source: str, index: int) -> str: + """ + Extract the name of a named capturing group, e.g., (?P...) returns "name". + """ + group_name_start = index + 3 + group_name_end = source.find(">", group_name_start) + return source[group_name_start:group_name_end] + + @staticmethod + def is_non_capturing(source: str, index: int) -> bool: + # Check if it's a non-capturing group like (?:...) if source[index + 1] != "?": - # (X) return False - if source[index + 2] != "<": - # (?:X) - # (?idmsuxU-idmsuxU) - # (?idmsux-idmsux:X) - # (?=X) - # (?!X) - # (?>X) - return True - # (?<=X) or (?X) - return source[index + 3] in ["=", "!"] - @property - def group_builder(self): - return self._group_builder + # If it's a named group (e.g., (?P...)), it's still a capturing group + if source[index + 2] == "P" and source[index + 3] == "<": + return False # Named capturing group, should return False (it's capturing) + + # Otherwise, it's a non-capturing group (e.g., (?:...), (?=...), etc.) + return True diff --git a/python/pyproject.toml b/python/pyproject.toml index ea894fa60..094cbcccd 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -39,7 +39,7 @@ classifiers = [ ] [tool.poetry.dependencies] -python = "^3.8" +python = "^3.9" [tool.poetry.dev-dependencies] pre-commit = "^3.3" diff --git a/python/tests/test_argument.py b/python/tests/test_argument.py index 44413b57c..3f836d831 100644 --- a/python/tests/test_argument.py +++ b/python/tests/test_argument.py @@ -10,6 +10,6 @@ def test_exposes_parameter_type(self): arguments = Argument.build( tree_regexp, "three blind mice", - [parameter_type_registry.lookup_by_type_name("string")], + [(parameter_type_registry.lookup_by_type_name("string"), None)], ) assert arguments[0].parameter_type.name == "string" diff --git a/python/tests/test_expression.py b/python/tests/test_expression.py index 0e5f40e2e..3a7b54c6d 100644 --- a/python/tests/test_expression.py +++ b/python/tests/test_expression.py @@ -1,5 +1,6 @@ from decimal import Decimal from pathlib import Path +from typing import Optional, Any from tests.definitions import TESTDATA_ROOT_DIR @@ -12,18 +13,14 @@ def get_expectation_yamls(): yaml_dir = Path(TESTDATA_ROOT_DIR) / "cucumber-expression" / "matching" - return [ - Path(yaml_dir) / file - for file in Path(yaml_dir).iterdir() - if file.suffix == ".yaml" - ] + return [yaml_dir / file for file in yaml_dir.iterdir() if file.suffix == ".yaml"] def match( expression: str, match_text: str, parameter_registry: ParameterTypeRegistry = ParameterTypeRegistry(), -): +) -> Optional[tuple[Any, str]]: cucumber_expression = CucumberExpression(expression, parameter_registry) matches = cucumber_expression.match(match_text) @@ -34,8 +31,7 @@ def transform_value(value): return str(value) return value - return matches and [transform_value(arg.value) for arg in matches] - + return matches and [(transform_value(arg.value), arg.name) for arg in matches] class TestCucumberExpression: @pytest.mark.parametrize("load_test_yamls", get_expectation_yamls(), indirect=True) @@ -47,11 +43,16 @@ def test_cucumber_expression_matches(self, load_test_yamls: dict): assert excinfo.value.args[0] == expectation["exception"] else: values = match(expectation["expression"], expectation["text"]) - assert values == expectation["expected_args"] + actual_result = None if values is None else [value[0] for value in values] + assert actual_result == expectation["expected_args"] def test_documents_match_arguments(self): values = match("I have {int} cuke(s)", "I have 7 cukes") - assert values[0] == 7 + assert values[0] == (7, None) + + def test_documents_match_arguments_with_names(self): + values = match("I have {cuke_count:int} cuke(s)", "I have 7 cukes") + assert values[0] == (7, "cuke_count") def test_matches_float(self): assert match("{float}", "") is None @@ -63,40 +64,44 @@ def test_matches_float(self): assert match("{float}", ",1") is None assert match("{float}", "1.") is None - assert match("{float}", "1") == [1] - assert match("{float}", "-1") == [-1] - assert match("{float}", "1.1") == [1.1] + assert match("{float}", "1") == [(1, None)] + assert match("{float}", "-1") == [(-1, None)] + assert match("{float}", "1.1") == [(1.1, None)] assert match("{float}", "1,000") is None assert match("{float}", "1,000,0") is None assert match("{float}", "1,000.1") is None assert match("{float}", "1,000,10") is None assert match("{float}", "1,0.1") is None assert match("{float}", "1,000,000.1") is None - assert match("{float}", "-1.1") == [-1.1] + assert match("{float}", "-1.1") == [(-1.1, None)] - assert match("{float}", ".1") == [0.1] - assert match("{float}", "-.1") == [-0.1] - assert match("{float}", "-.1000001") == [-0.1000001] - assert match("{float}", "1E1") == [10.0] - assert match("{float}", ".1E1") == [1] + assert match("{float}", ".1") == [(0.1, None)] + assert match("{float}", "-.1") == [(-0.1, None)] + assert match("{float}", "-.1000001") == [(-0.1000001, None)] + assert match("{float}", "1E1") == [(10.0, None)] + assert match("{float}", ".1E1") == [(1, None)] assert match("{float}", "E1") is None - assert match("{float}", "-.1E-1") == [-0.01] - assert match("{float}", "-.1E-2") == [-0.001] - assert match("{float}", "-.1E+1") == [-1] - assert match("{float}", "-.1E+2") == [-10] - assert match("{float}", "-.1E1") == [-1] - assert match("{float}", "-.1E2") == [-10] + assert match("{float}", "-.1E-1") == [(-0.01, None)] + assert match("{float}", "-.1E-2") == [(-0.001, None)] + assert match("{float}", "-.1E+1") == [(-1, None)] + assert match("{float}", "-.1E+2") == [(-10, None)] + assert match("{float}", "-.1E1") == [(-1, None)] + assert match("{float}", "-.1E2") == [(-10, None)] def test_float_with_zero(self): - assert match("{float}", "0") == [0.0] + assert match("{float}", "0") == [(0.0, None)] def test_matches_anonymous(self): - assert match("{}", "0.22") == ["0.22"] + assert match("{}", "0.22") == [("0.22", None)] def test_exposes_source(self): expr = "I have {int} cuke(s)" assert CucumberExpression(expr, ParameterTypeRegistry()).source == expr + def test_with_name_exposes_source(self): + expr = "I have {cuke_count:int} cuke(s)" + assert CucumberExpression(expr, ParameterTypeRegistry()).source == expr + def test_unmatched_optional_groups_have_undefined_values(self): parameter_type_registry = ParameterTypeRegistry() parameter_type_registry.define_parameter_type( @@ -110,11 +115,5 @@ def test_unmatched_optional_groups_have_undefined_values(self): ) ) - assert match("{textAndOrNumber}", "TLA", parameter_type_registry)[0] == [ - "TLA", - None, - ] - assert match("{textAndOrNumber}", "123", parameter_type_registry)[0] == [ - None, - "123", - ] + assert match("{textAndOrNumber}", "TLA", parameter_type_registry)[0] == (["TLA", None], None) + assert match("{textAndOrNumber}", "123", parameter_type_registry)[0] == ([None, "123"], None) diff --git a/python/tests/test_expression_factory.py b/python/tests/test_expression_factory.py new file mode 100644 index 000000000..e237d1aba --- /dev/null +++ b/python/tests/test_expression_factory.py @@ -0,0 +1,31 @@ +from cucumber_expressions.expression import CucumberExpression +from cucumber_expressions.expression_factory import ExpressionFactory +from cucumber_expressions.regular_expression import RegularExpression + + +def test_expression_factory_regex(): + input_str = r"I have (?P\d+) cukes? in my (?P\w+) now" + expression = ExpressionFactory().create_expression(input_str) + assert isinstance(expression, RegularExpression) + matches = expression.match('I have 4 cukes in my belly now') + assert matches[0].value == 4 + assert matches[0].name == "cuke_count" + assert matches[1].value == "belly" + assert matches[1].name == "word" + + +def test_expression_factory_cucumber_expression(): + input_str = "I have {name:int} cukes in my {string} now" + expression = ExpressionFactory().create_expression(input_str) + assert isinstance(expression, CucumberExpression) + matches = expression.match("I have 4 cukes in my \"belly\" now") + assert matches[0].value == 4 + assert matches[0].name == "name" + assert matches[1].value == "belly" + assert matches[1].name is None + + +def test_expression_factory_invalid(): + input_str = "^(?:(\d{2,4})-)?(\d{1,3})\s*([A-Za-z]{3})\s*(?:\{(\d+,\d+|\d+)\})?(\d{1,2})(?:\{[A-Za-z0-9]+\})?$" + expression = ExpressionFactory().create_expression(input_str) + assert isinstance(expression, RegularExpression) diff --git a/python/tests/test_regular_expression.py b/python/tests/test_regular_expression.py index b5d641a7b..b50cad746 100644 --- a/python/tests/test_regular_expression.py +++ b/python/tests/test_regular_expression.py @@ -75,7 +75,7 @@ def test_works_with_escaped_parentheses(self): assert self._match(r"Across the line\(s\)", "Across the line(s)") == [] def test_exposes_regexp(self): - regexp = r"I have (\d+) cukes? in my (\+) now" + regexp = "I have (\d+) cukes? in my (\+) now" expression = RegularExpression(regexp, ParameterTypeRegistry()) assert expression.regexp == regexp diff --git a/python/tests/test_tree_regex.py b/python/tests/test_tree_regex.py index 2865ff50d..87941ad37 100644 --- a/python/tests/test_tree_regex.py +++ b/python/tests/test_tree_regex.py @@ -58,13 +58,25 @@ def test_ignores_atomic_non_capturing_group(self): tree_regexp = TreeRegexp("a(?=(?Pb))(?P=tmp)c") group = tree_regexp.match("abc") assert "abc" == group.value - assert 0 == len(group.children) + assert 1 == len(group.children) def test_matches_named_capturing_group(self): tree_regexp = TreeRegexp("a(?Pb)c$") group = tree_regexp.match("abc") assert "abc" == group.value - assert 0 == len(group.children) + assert 1 == len(group.children) + + def test_matches_named_capturing_group_returns_name(self): + tree_regexp = TreeRegexp(r"(a)(?Pb)(c)(?Pd)") + group = tree_regexp.match("abcd") + assert "abcd" == group.value + assert len(group.children) == 4 + assert group.children[0].value == "a" + assert group.children[1].value == "b" + assert group.children[1].name == "name" + assert group.children[2].value == "c" + assert group.children[3].value == "d" + assert group.children[3].name == "other" def test_matches_optional_group(self): tree_regexp = TreeRegexp("^Something( with an optional argument)?") From f9a7afb9892d64d5e92520fa777bd8a46568fbef Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Fri, 6 Dec 2024 08:22:48 +0000 Subject: [PATCH 2/7] Fix flake8 --- python/cucumber_expressions/argument.py | 15 +++++++++++---- python/cucumber_expressions/expression.py | 19 ++++++++++++++----- .../expression_factory.py | 5 +++-- python/cucumber_expressions/group.py | 9 ++++++++- python/cucumber_expressions/parameter_type.py | 7 +++++-- .../parameter_type_registry.py | 16 +++++++++++++--- .../regular_expression.py | 12 +++++++++--- python/cucumber_expressions/tree_regexp.py | 4 ++-- python/tests/test_expression.py | 11 +++++++++-- python/tests/test_expression_factory.py | 6 +++--- python/tests/test_regular_expression.py | 2 +- 11 files changed, 78 insertions(+), 28 deletions(-) diff --git a/python/cucumber_expressions/argument.py b/python/cucumber_expressions/argument.py index 759e5aed3..c5bd4e2ff 100644 --- a/python/cucumber_expressions/argument.py +++ b/python/cucumber_expressions/argument.py @@ -8,14 +8,18 @@ class Argument: - def __init__(self, group: Group, parameter_type: ParameterType, name: Optional[str]): + def __init__( + self, group: Group, parameter_type: ParameterType, name: Optional[str] + ): self.group = group self.parameter_type = parameter_type self.name = name @staticmethod def build( - tree_regexp: TreeRegexp, text: str, parameter_types_and_names: list[tuple[ParameterType, Optional[str]]] + tree_regexp: TreeRegexp, + text: str, + parameter_types_and_names: list[tuple[ParameterType, Optional[str]]], ) -> Optional[list[Argument]]: # Check if all elements in parameter_types_and_names are tuples for item in parameter_types_and_names: @@ -31,13 +35,16 @@ def build( arg_groups = match_group.children if len(arg_groups) != len(parameter_types_and_names): + param_count = len(parameter_types_and_names) raise CucumberExpressionError( - f"Group has {len(arg_groups)} capture groups, but there were {len(parameter_types_and_names)} parameter types/names" + f"Group has {len(arg_groups)} capture groups, but there were {param_count} parameter types/names" ) return [ Argument(arg_group, parameter_type, parameter_name) - for (parameter_type, parameter_name), arg_group in zip(parameter_types_and_names, arg_groups) + for (parameter_type, parameter_name), arg_group in zip( + parameter_types_and_names, arg_groups + ) ] @property diff --git a/python/cucumber_expressions/expression.py b/python/cucumber_expressions/expression.py index c267cb577..84313bde2 100644 --- a/python/cucumber_expressions/expression.py +++ b/python/cucumber_expressions/expression.py @@ -11,7 +11,8 @@ OptionalIsNotAllowedInOptional, OptionalMayNotBeEmpty, AlternativeMayNotBeEmpty, - AlternativeMayNotExclusivelyContainOptionals, UndefinedParameterTypeError, + AlternativeMayNotExclusivelyContainOptionals, + UndefinedParameterTypeError, ) ESCAPE_PATTERN = rb"([\\^\[({$.|?*+})\]])" @@ -59,9 +60,13 @@ def escape_regex(expression) -> str: def rewrite_optional(self, node: Node) -> str: if self.get_possible_node_with_parameters(node): - raise ParameterIsNotAllowedInOptional(self.get_possible_node_with_parameters(node), self.expression) + raise ParameterIsNotAllowedInOptional( + self.get_possible_node_with_parameters(node), self.expression + ) if self.get_possible_node_with_optionals(node): - raise OptionalIsNotAllowedInOptional(self.get_possible_node_with_optionals(node), self.expression) + raise OptionalIsNotAllowedInOptional( + self.get_possible_node_with_optionals(node), self.expression + ) if self.are_nodes_empty(node): raise OptionalMayNotBeEmpty(node, self.expression) regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) @@ -95,11 +100,15 @@ def rewrite_parameter(self, node: Node) -> str: return rf"({regexps[0]})" return rf"((?:{')|(?:'.join(regexps)}))" - def parse_parameter_name(self, name: str) -> tuple[Optional[str], Optional[ParameterType]]: + def parse_parameter_name( + self, name: str + ) -> tuple[Optional[str], Optional[ParameterType]]: """Helper function to parse the parameter name and return group_name and parameter_type.""" if ":" in name: group_name, parameter_type_name = name.split(":") - parameter_type = self.parameter_type_registry.lookup_by_type_name(parameter_type_name) + parameter_type = self.parameter_type_registry.lookup_by_type_name( + parameter_type_name + ) else: group_name = None parameter_type = self.parameter_type_registry.lookup_by_type_name(name) diff --git a/python/cucumber_expressions/expression_factory.py b/python/cucumber_expressions/expression_factory.py index 5753bb8ba..58cfa1e6a 100644 --- a/python/cucumber_expressions/expression_factory.py +++ b/python/cucumber_expressions/expression_factory.py @@ -9,7 +9,9 @@ class ExpressionFactory: - def __init__(self, parameter_type_registry: ParameterTypeRegistry = ParameterTypeRegistry()): + def __init__( + self, parameter_type_registry: ParameterTypeRegistry = ParameterTypeRegistry() + ): self.parameter_type_registry = parameter_type_registry @staticmethod @@ -31,7 +33,6 @@ def is_cucumber_expression(self, expression_string: str): return False # Found a form of curly bracket return True # All curly brackets are valid - def create_expression(self, expression_string: str): if self.is_cucumber_expression(expression_string): return CucumberExpression(expression_string, self.parameter_type_registry) diff --git a/python/cucumber_expressions/group.py b/python/cucumber_expressions/group.py index 664a59b44..5ccac1804 100644 --- a/python/cucumber_expressions/group.py +++ b/python/cucumber_expressions/group.py @@ -1,5 +1,12 @@ class Group: - def __init__(self, value: str, start: int, end: int, children: list["Group"], name: str | None = None): + def __init__( + self, + value: str, + start: int, + end: int, + children: list["Group"], + name: str | None = None, + ): self.children = children self.name = name self.value = value diff --git a/python/cucumber_expressions/parameter_type.py b/python/cucumber_expressions/parameter_type.py index c89c7f15d..765136ca1 100644 --- a/python/cucumber_expressions/parameter_type.py +++ b/python/cucumber_expressions/parameter_type.py @@ -8,11 +8,12 @@ ILLEGAL_PARAMETER_NAME_PATTERN = re.compile(r"([\[\]()$.|?*+])") -T = TypeVar('T') +T = TypeVar("T") class ParameterType: """Creates a new Parameter Type""" + def __init__( self, name: str | None, @@ -90,7 +91,9 @@ def _get_regexp_source(regexp_pattern: Pattern) -> str: ) return regexp_pattern.pattern - def to_array(self, regexps: Union[list[str], str, list[Pattern], Pattern]) -> list[str]: + def to_array( + self, regexps: Union[list[str], str, list[Pattern], Pattern] + ) -> list[str]: """Make a list of regexps if not already""" array: list = regexps if isinstance(regexps, list) else [regexps] return [ diff --git a/python/cucumber_expressions/parameter_type_registry.py b/python/cucumber_expressions/parameter_type_registry.py index 6c84bac81..44208bdad 100644 --- a/python/cucumber_expressions/parameter_type_registry.py +++ b/python/cucumber_expressions/parameter_type_registry.py @@ -83,7 +83,12 @@ def parameter_types(self) -> list: def lookup_by_type_name(self, name: str) -> Optional[ParameterType]: return self.parameter_type_by_name.get(name) - def lookup_by_regexp(self, parameter_type_regexp: str, expression_regexp: Union[str, re.Pattern], text: str): + def lookup_by_regexp( + self, + parameter_type_regexp: str, + expression_regexp: Union[str, re.Pattern], + text: str, + ): """ Lookup and match the text using parameter types, then transform the results. Supports both named and unnamed capture groups. @@ -92,8 +97,13 @@ def lookup_by_regexp(self, parameter_type_regexp: str, expression_regexp: Union[ if not parameter_types: return None if len(parameter_types) > 1 and not parameter_types[0].prefer_for_regexp_match: - from cucumber_expressions.expression_generator import CucumberExpressionGenerator - generated_expressions = CucumberExpressionGenerator(self).generate_expressions(text) + from cucumber_expressions.expression_generator import ( + CucumberExpressionGenerator, + ) + + generated_expressions = CucumberExpressionGenerator( + self + ).generate_expressions(text) raise AmbiguousParameterTypeError( parameter_type_regexp, expression_regexp, diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index 5bd304ec7..26fbc14ce 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -16,7 +16,9 @@ class RegularExpression: dynamically typed languages.""" def __init__( - self, expression_regexp: Union[re.Pattern, str], parameter_type_registry: ParameterTypeRegistry + self, + expression_regexp: Union[re.Pattern, str], + parameter_type_registry: ParameterTypeRegistry, ): """Creates a new instance. Use this when the transform types are not known in advance, and should be determined by the regular expression's capture groups. Use this with @@ -63,13 +65,17 @@ def _process_capture_group(self, group_source: str): # No named group, just return the original pattern return None, group_source - def generate_parameter_types(self, text) -> Generator[tuple[ParameterType, Optional[str]]]: + def generate_parameter_types( + self, text + ) -> Generator[tuple[ParameterType, Optional[str]]]: for group_builder in self.tree_regexp.group_builder.children: # Extract the raw source for the group parameter_type_regexp = group_builder.source # Process the capture group (check if it's named and clean the pattern) - capture_name, cleaned_pattern = self._process_capture_group(parameter_type_regexp) + capture_name, cleaned_pattern = self._process_capture_group( + parameter_type_regexp + ) # Lookup the parameter type using the stripped capture group possible_regexp = self.parameter_type_registry.lookup_by_regexp( diff --git a/python/cucumber_expressions/tree_regexp.py b/python/cucumber_expressions/tree_regexp.py index 17e25f3d5..7f9f06a63 100644 --- a/python/cucumber_expressions/tree_regexp.py +++ b/python/cucumber_expressions/tree_regexp.py @@ -49,7 +49,7 @@ def create_group_builder(self, regexp): group_start = group_start_stack.pop() group_start = group_start or 0 if group_builder.capturing: - group_builder.source = source[(group_start + 1): index] + group_builder.source = source[(group_start + 1) : index] stack[-1].add(group_builder) else: group_builder.move_children_to(stack[-1]) @@ -61,7 +61,7 @@ def is_named_group(source: str, index: int) -> bool: """ Check if the group at the given index is a named capturing group, e.g. (?P...). """ - return source[index + 1: index + 3] == "P<" and source[index + 3] != "?" + return source[index + 1 : index + 3] == "P<" and source[index + 3] != "?" @staticmethod def extract_named_group_name(source: str, index: int) -> str: diff --git a/python/tests/test_expression.py b/python/tests/test_expression.py index 3a7b54c6d..cdc9d88af 100644 --- a/python/tests/test_expression.py +++ b/python/tests/test_expression.py @@ -33,6 +33,7 @@ def transform_value(value): return matches and [(transform_value(arg.value), arg.name) for arg in matches] + class TestCucumberExpression: @pytest.mark.parametrize("load_test_yamls", get_expectation_yamls(), indirect=True) def test_cucumber_expression_matches(self, load_test_yamls: dict): @@ -115,5 +116,11 @@ def test_unmatched_optional_groups_have_undefined_values(self): ) ) - assert match("{textAndOrNumber}", "TLA", parameter_type_registry)[0] == (["TLA", None], None) - assert match("{textAndOrNumber}", "123", parameter_type_registry)[0] == ([None, "123"], None) + assert match("{textAndOrNumber}", "TLA", parameter_type_registry)[0] == ( + ["TLA", None], + None, + ) + assert match("{textAndOrNumber}", "123", parameter_type_registry)[0] == ( + [None, "123"], + None, + ) diff --git a/python/tests/test_expression_factory.py b/python/tests/test_expression_factory.py index e237d1aba..74a5f79a9 100644 --- a/python/tests/test_expression_factory.py +++ b/python/tests/test_expression_factory.py @@ -7,7 +7,7 @@ def test_expression_factory_regex(): input_str = r"I have (?P\d+) cukes? in my (?P\w+) now" expression = ExpressionFactory().create_expression(input_str) assert isinstance(expression, RegularExpression) - matches = expression.match('I have 4 cukes in my belly now') + matches = expression.match("I have 4 cukes in my belly now") assert matches[0].value == 4 assert matches[0].name == "cuke_count" assert matches[1].value == "belly" @@ -18,7 +18,7 @@ def test_expression_factory_cucumber_expression(): input_str = "I have {name:int} cukes in my {string} now" expression = ExpressionFactory().create_expression(input_str) assert isinstance(expression, CucumberExpression) - matches = expression.match("I have 4 cukes in my \"belly\" now") + matches = expression.match('I have 4 cukes in my "belly" now') assert matches[0].value == 4 assert matches[0].name == "name" assert matches[1].value == "belly" @@ -26,6 +26,6 @@ def test_expression_factory_cucumber_expression(): def test_expression_factory_invalid(): - input_str = "^(?:(\d{2,4})-)?(\d{1,3})\s*([A-Za-z]{3})\s*(?:\{(\d+,\d+|\d+)\})?(\d{1,2})(?:\{[A-Za-z0-9]+\})?$" + input_str = r"^(?:(\d{2,4})-)?(\d{1,3})\s*([A-Za-z]{3})\s*(?:\{(\d+,\d+|\d+)\})?(\d{1,2})(?:\{[A-Za-z0-9]+\})?$" expression = ExpressionFactory().create_expression(input_str) assert isinstance(expression, RegularExpression) diff --git a/python/tests/test_regular_expression.py b/python/tests/test_regular_expression.py index b50cad746..b5d641a7b 100644 --- a/python/tests/test_regular_expression.py +++ b/python/tests/test_regular_expression.py @@ -75,7 +75,7 @@ def test_works_with_escaped_parentheses(self): assert self._match(r"Across the line\(s\)", "Across the line(s)") == [] def test_exposes_regexp(self): - regexp = "I have (\d+) cukes? in my (\+) now" + regexp = r"I have (\d+) cukes? in my (\+) now" expression = RegularExpression(regexp, ParameterTypeRegistry()) assert expression.regexp == regexp From ea7cef3fcafca5513575b6e79fe160aa658743c1 Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Fri, 6 Dec 2024 09:07:07 +0000 Subject: [PATCH 3/7] Remove removing support for py 3.8 --- python/cucumber_expressions/argument.py | 6 +- python/cucumber_expressions/ast.py | 58 ++++++++++--- ...binatorial_generated_expression_factory.py | 8 +- python/cucumber_expressions/expression.py | 8 +- .../expression_generator.py | 7 +- .../cucumber_expressions/expression_parser.py | 32 +++---- .../expression_tokenizer.py | 8 +- .../generated_expression.py | 6 +- python/cucumber_expressions/group.py | 33 +++++-- python/cucumber_expressions/group_builder.py | 44 +++++++--- python/cucumber_expressions/parameter_type.py | 85 ++++++++++--------- .../parameter_type_registry.py | 4 +- .../regular_expression.py | 4 +- python/cucumber_expressions/tree_regexp.py | 4 +- python/pyproject.toml | 2 +- 15 files changed, 199 insertions(+), 110 deletions(-) diff --git a/python/cucumber_expressions/argument.py b/python/cucumber_expressions/argument.py index c5bd4e2ff..93823c956 100644 --- a/python/cucumber_expressions/argument.py +++ b/python/cucumber_expressions/argument.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Optional +from typing import Optional, List from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.tree_regexp import TreeRegexp, Group @@ -19,8 +19,8 @@ def __init__( def build( tree_regexp: TreeRegexp, text: str, - parameter_types_and_names: list[tuple[ParameterType, Optional[str]]], - ) -> Optional[list[Argument]]: + parameter_types_and_names: List[tuple[ParameterType, Optional[str]]], + ) -> Optional[List[Argument]]: # Check if all elements in parameter_types_and_names are tuples for item in parameter_types_and_names: if not isinstance(item, tuple) or len(item) != 2: diff --git a/python/cucumber_expressions/ast.py b/python/cucumber_expressions/ast.py index 10ac73b85..9db38f026 100644 --- a/python/cucumber_expressions/ast.py +++ b/python/cucumber_expressions/ast.py @@ -1,7 +1,7 @@ from __future__ import annotations from enum import Enum -from typing import Optional, Any +from typing import Optional, List, Any class NodeType(Enum): @@ -41,18 +41,38 @@ class Node: def __init__( self, ast_type: NodeType, - nodes: Optional[list[Node]], + nodes: Optional[List[Node]], token: Optional[str], start: int, end: int, ): if nodes is None and token is None: raise Exception("Either nodes or token must be defined") - self.ast_type = ast_type - self.nodes = nodes - self.token = token - self.start = start - self.end = end + self._ast_type = ast_type + self._nodes = nodes + self._token = token + self._start = start + self._end = end + + @property + def ast_type(self) -> NodeType: + return self._ast_type + + @property + def nodes(self) -> List[Node]: + return self._nodes + + @property + def token(self) -> str: + return self._token + + @property + def start(self) -> int: + return self._start + + @property + def end(self) -> int: + return self._end @property def text(self) -> str: @@ -71,10 +91,26 @@ def to_json(self) -> dict[str, Any]: class Token: def __init__(self, ast_type: TokenType, text: str, start: int, end: int): - self.ast_type = ast_type - self.text = text - self.start = start - self.end = end + self._ast_type = ast_type + self._text = text + self._start = start + self._end = end + + @property + def ast_type(self): + return self._ast_type + + @property + def text(self): + return self._text + + @property + def start(self): + return self._start + + @property + def end(self): + return self._end @staticmethod def is_escape_character(char: str) -> bool: diff --git a/python/cucumber_expressions/combinatorial_generated_expression_factory.py b/python/cucumber_expressions/combinatorial_generated_expression_factory.py index 1346c9af9..9b8679ac3 100644 --- a/python/cucumber_expressions/combinatorial_generated_expression_factory.py +++ b/python/cucumber_expressions/combinatorial_generated_expression_factory.py @@ -1,3 +1,5 @@ +from typing import List + from cucumber_expressions.generated_expression import GeneratedExpression from cucumber_expressions.parameter_type import ParameterType @@ -10,16 +12,16 @@ def __init__(self, expression_template, parameter_type_combinations): self.expression_template = expression_template self.parameter_type_combinations = parameter_type_combinations - def generate_expressions(self) -> list[GeneratedExpression]: + def generate_expressions(self) -> List[GeneratedExpression]: generated_expressions = [] self.generate_permutations(generated_expressions, 0, []) return generated_expressions def generate_permutations( self, - generated_expressions: list[GeneratedExpression], + generated_expressions: List[GeneratedExpression], depth: int, - current_parameter_types: list[ParameterType], + current_parameter_types: List[ParameterType], ): if len(generated_expressions) >= MAX_EXPRESSIONS: return diff --git a/python/cucumber_expressions/expression.py b/python/cucumber_expressions/expression.py index 84313bde2..c25e7a8f5 100644 --- a/python/cucumber_expressions/expression.py +++ b/python/cucumber_expressions/expression.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, List from cucumber_expressions.argument import Argument from cucumber_expressions.ast import Node, NodeType @@ -22,12 +22,12 @@ class CucumberExpression: def __init__(self, expression: str, parameter_type_registry: ParameterTypeRegistry): self.expression = expression self.parameter_type_registry = parameter_type_registry - self.parameter_types_and_names: list[tuple[ParameterType, Optional[str]]] = [] + self.parameter_types_and_names: List[tuple[ParameterType, Optional[str]]] = [] self.tree_regexp = TreeRegexp( self.rewrite_to_regex(CucumberExpressionParser().parse(self.expression)) ) - def match(self, text: str) -> Optional[list[Argument]]: + def match(self, text: str) -> Optional[List[Argument]]: return Argument.build(self.tree_regexp, text, self.parameter_types_and_names) @property @@ -130,5 +130,5 @@ def get_possible_node_with_optionals(self, node: Node) -> Optional[Node]: return results[0] if results else None @staticmethod - def get_nodes_with_ast_type(node: Node, ast_type: NodeType) -> list[Node]: + def get_nodes_with_ast_type(node: Node, ast_type: NodeType) -> List[Node]: return [ast_node for ast_node in node.nodes if ast_node.ast_type == ast_type] diff --git a/python/cucumber_expressions/expression_generator.py b/python/cucumber_expressions/expression_generator.py index c76bd38d4..7d613e8bd 100644 --- a/python/cucumber_expressions/expression_generator.py +++ b/python/cucumber_expressions/expression_generator.py @@ -1,5 +1,6 @@ import functools import re +from typing import List from cucumber_expressions.generated_expression import GeneratedExpression from cucumber_expressions.parameter_type import ParameterType @@ -14,10 +15,10 @@ class CucumberExpressionGenerator: def __init__(self, parameter_type_registry: ParameterTypeRegistry): self.parameter_type_registry = parameter_type_registry - def generate_expressions(self, text: str) -> list[GeneratedExpression]: + def generate_expressions(self, text: str) -> List[GeneratedExpression]: parameter_type_combinations = [] parameter_type_matchers = self.create_parameter_type_matchers(text) - expression_template: list[str] = [] + expression_template: List[str] = [] pos = 0 while True: @@ -92,7 +93,7 @@ def create_parameter_type_matchers(self, text) -> list[ParameterTypeMatcher]: @staticmethod def create_parameter_type_matchers_with_type( parameter_type, text - ) -> list[ParameterTypeMatcher]: + ) -> List[ParameterTypeMatcher]: return [ ParameterTypeMatcher(parameter_type, re.compile(f"({regexp})"), text, 0) for regexp in parameter_type.regexps diff --git a/python/cucumber_expressions/expression_parser.py b/python/cucumber_expressions/expression_parser.py index 722c40d4c..bf755fdbd 100644 --- a/python/cucumber_expressions/expression_parser.py +++ b/python/cucumber_expressions/expression_parser.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import NamedTuple, Optional, Callable +from typing import NamedTuple, Optional, Callable, List from cucumber_expressions.ast import Token, TokenType, Node, NodeType from cucumber_expressions.errors import ( @@ -18,7 +18,7 @@ class Result(NamedTuple): class Parser(NamedTuple): expression: str - tokens: list[Token] + tokens: List[Token] current: int @@ -75,7 +75,7 @@ def parse(self, expression: str) -> Node: # optional := '(' + option* + ')' # option := optional | parameter | text - optional_sub_parsers: list = [] + optional_sub_parsers = [] parse_optional = self.parse_between( NodeType.OPTIONAL, TokenType.BEGIN_OPTIONAL, @@ -170,7 +170,7 @@ def parse_between( ast_type: NodeType, begin_token: TokenType, end_token: TokenType, - parsers: list, + parsers: List, ) -> Callable[[Parser], Result | tuple[int, Node]]: def _parse_between(parser: Parser): if not self.looking_at(parser.tokens, parser.current, begin_token): @@ -205,7 +205,7 @@ def _parse_between(parser: Parser): @staticmethod def parse_token( - expression, parsers: list, tokens: list[Token], start_at: int + expression, parsers: List, tokens: List[Token], start_at: int ) -> Result: for parser in parsers: consumed, ast = parser(Parser(expression, tokens, start_at)) @@ -217,14 +217,14 @@ def parse_token( def parse_tokens_until( self, expression, - parsers: list, - tokens: list[Token], + parsers: List, + tokens: List[Token], start_at: int, - end_tokens: list[TokenType], - ) -> tuple[int, list[Node]]: + end_tokens: List[TokenType], + ) -> tuple[int, List[Node]]: current = start_at size = len(tokens) - ast: list[Node] = [] + ast: List[Node] = [] while current < size: if self.looking_at_any(tokens, current, end_tokens): break @@ -238,14 +238,14 @@ def parse_tokens_until( return current - start_at, ast def looking_at_any( - self, tokens: list[Token], position: int, token_types: list[TokenType] + self, tokens: List[Token], position: int, token_types: List[TokenType] ) -> bool: return any( self.looking_at(tokens, position, token_type) for token_type in token_types ) @staticmethod - def looking_at(tokens: list[Token], position: int, token_type: TokenType) -> bool: + def looking_at(tokens: List[Token], position: int, token_type: TokenType) -> bool: if position < 0: # If configured correctly this will never happen # Keep for completeness @@ -255,8 +255,8 @@ def looking_at(tokens: list[Token], position: int, token_type: TokenType) -> boo return tokens[position].ast_type == token_type def split_alternatives( - self, start: int, end: int, alternation: list[Node] - ) -> list[Node]: + self, start: int, end: int, alternation: List[Node] + ) -> List[Node]: separators = [] alternatives = [] alternative = [] @@ -272,8 +272,8 @@ def split_alternatives( @staticmethod def create_alternative_nodes( - start: int, end: int, separators: list, alternatives: list - ) -> list[Node]: + start: int, end: int, separators: List, alternatives: List + ) -> List[Node]: for index, alternative in enumerate(alternatives): if index == 0: right_separator = separators[index] diff --git a/python/cucumber_expressions/expression_tokenizer.py b/python/cucumber_expressions/expression_tokenizer.py index a1ccf11e9..92dc3c391 100644 --- a/python/cucumber_expressions/expression_tokenizer.py +++ b/python/cucumber_expressions/expression_tokenizer.py @@ -1,3 +1,5 @@ +from typing import List + from cucumber_expressions.ast import TokenType, Token from cucumber_expressions.errors import ( TheEndOfLineCannotBeEscaped, @@ -8,11 +10,11 @@ class CucumberExpressionTokenizer: def __init__(self): self.expression: str = "" - self.buffer: list[str] = [] + self.buffer: List[str] = [] self.escaped: int = 0 self.buffer_start_index: int = 0 - def tokenize(self, expression: str, to_json: bool = False) -> list[Token]: + def tokenize(self, expression: str, to_json: bool = False) -> List[Token]: self.expression = expression tokens = [] previous_token_type = TokenType.START_OF_LINE @@ -48,7 +50,7 @@ def tokenize(self, expression: str, to_json: bool = False) -> list[Token]: tokens.append(Token(TokenType.END_OF_LINE, "", len(chars), len(chars))) - def convert_to_json_format(_tokens: list[Token]) -> list: + def convert_to_json_format(_tokens: List[Token]) -> List: return [ { "type": t.ast_type.value, diff --git a/python/cucumber_expressions/generated_expression.py b/python/cucumber_expressions/generated_expression.py index 54dbb16db..09b9a37bc 100644 --- a/python/cucumber_expressions/generated_expression.py +++ b/python/cucumber_expressions/generated_expression.py @@ -5,14 +5,14 @@ def __init__(self, expression_template: str, parameter_types): self.usage_by_type_name = {} @property - def source(self) -> str: + def source(self): return self.expression_template % tuple(p.name for p in self.parameter_types) @property - def parameter_names(self) -> list[str]: + def parameter_names(self): return [self.get_parameter_name(t.name) for t in self.parameter_types] - def get_parameter_name(self, type_name: str) -> str: + def get_parameter_name(self, type_name): count = self.usage_by_type_name.get(type_name) or 0 count = count + 1 self.usage_by_type_name[type_name] = count diff --git a/python/cucumber_expressions/group.py b/python/cucumber_expressions/group.py index 5ccac1804..22ace3752 100644 --- a/python/cucumber_expressions/group.py +++ b/python/cucumber_expressions/group.py @@ -1,17 +1,38 @@ +from __future__ import annotations + +from typing import List, Optional + + class Group: def __init__( self, value: str, start: int, end: int, - children: list["Group"], - name: str | None = None, + children: List[Group], + name: Optional[str] = None, ): - self.children = children + self._children = children + self._value = value + self._start = start + self._end = end self.name = name - self.value = value - self.start = start - self.end = end + + @property + def value(self): + return self._value + + @property + def start(self): + return self._start + + @property + def end(self): + return self._end + + @property + def children(self): + return self._children @property def values(self): diff --git a/python/cucumber_expressions/group_builder.py b/python/cucumber_expressions/group_builder.py index 1d25df129..543efaeb3 100644 --- a/python/cucumber_expressions/group_builder.py +++ b/python/cucumber_expressions/group_builder.py @@ -1,22 +1,28 @@ +from __future__ import annotations + +from typing import List + from cucumber_expressions.group import Group class GroupBuilder: def __init__(self): - self.group_builders: list[GroupBuilder] = [] - self.capturing: bool = True - self.source: str | None = None - self.end_index: int | None = None + self._group_builders: List[GroupBuilder] = [] + self._capturing = True + self._source: str = "" + self._end_index = None + self._children: List[GroupBuilder] = [] - def add(self, group_builder: "GroupBuilder"): - self.group_builders.append(group_builder) + def add(self, group_builder: GroupBuilder): + self._group_builders.append(group_builder) def build(self, match, group_indices, group_name_map: dict) -> Group: group_index = next(group_indices) group_name = group_name_map.get(group_index, None) children = [ - gb.build(match, group_indices, group_name_map) for gb in self.group_builders + gb.build(match, group_indices, group_name_map) + for gb in self._group_builders ] return Group( name=group_name, @@ -26,10 +32,26 @@ def build(self, match, group_indices, group_name_map: dict) -> Group: children=children, ) - def move_children_to(self, group_builder: "GroupBuilder") -> None: - for child in self.group_builders: + def move_children_to(self, group_builder: GroupBuilder) -> None: + for child in self._group_builders: group_builder.add(child) @property - def children(self) -> list["GroupBuilder"]: - return self.group_builders + def capturing(self): + return self._capturing + + @capturing.setter + def capturing(self, value: bool): + self._capturing = value + + @property + def children(self) -> list[GroupBuilder]: + return self._group_builders + + @property + def source(self) -> str: + return self._source + + @source.setter + def source(self, source: str): + self._source = source diff --git a/python/cucumber_expressions/parameter_type.py b/python/cucumber_expressions/parameter_type.py index 765136ca1..7b5588ce1 100644 --- a/python/cucumber_expressions/parameter_type.py +++ b/python/cucumber_expressions/parameter_type.py @@ -1,51 +1,16 @@ from __future__ import annotations import re -from typing import Callable, Optional, Pattern, TypeVar, Union +from typing import Callable, Optional, Pattern, Union, List -from .errors import CucumberExpressionError +from cucumber_expressions.errors import CucumberExpressionError ILLEGAL_PARAMETER_NAME_PATTERN = re.compile(r"([\[\]()$.|?*+])") -T = TypeVar("T") - - class ParameterType: """Creates a new Parameter Type""" - def __init__( - self, - name: str | None, - regexp: list[str] | str | list[Pattern] | Pattern, - type: T, - transformer: Optional[Callable] = None, - use_for_snippets: bool = True, - prefer_for_regexp_match: bool = False, - ): - """Creates a new Parameter - :param name: name of the parameter type - :type name: Optional[str] - :param regexp: regexp or list of regexps for capture groups - :type regexp: list[str], str, list[Pattern] or Pattern - :param type: the return type of the transformed - :type type: class - :param transformer: transforms a str to (possibly) another type - :type transformer: lambda - :param use_for_snippets: if this should be used for snippet generation - :type use_for_snippets: bool - :param prefer_for_regexp_match: if this should be preferred over similar types - :type prefer_for_regexp_match: bool - """ - self.name = name - if self.name: - self._check_parameter_type_name(self.name) - self.type = type - self.transformer = transformer or (lambda value: type(value)) - self.use_for_snippets = use_for_snippets - self.prefer_for_regexp_match = prefer_for_regexp_match - self.regexps = self.to_array(regexp) - def _check_parameter_type_name(self, type_name): """Checks if a parameter type name is allowed""" if not self._is_valid_parameter_type_name(type_name): @@ -77,6 +42,46 @@ def compare(pt1: ParameterType, pt2: ParameterType): return 1 return 0 + def __init__( + self, + name: str | None, + regexp: Union[List[str], str, List[Pattern], Pattern], + type, + transformer: Optional[Callable] = None, + use_for_snippets: bool = True, + prefer_for_regexp_match: bool = False, + ): + """Creates a new Parameter + :param name: name of the parameter type + :type name: Optional[str] + :param regexp: regexp or list of regexps for capture groups + :type regexp: list[str], str, list[Pattern] or Pattern + :param type: the return type of the transformed + :type type: class + :param transformer: transforms a str to (possibly) another type + :type transformer: lambda + :param use_for_snippets: if this should be used for snippet generation + :type use_for_snippets: bool + :param prefer_for_regexp_match: if this should be preferred over similar types + :type prefer_for_regexp_match: bool + """ + self.name = name + if self.name: + self._check_parameter_type_name(self.name) + self.type = type + self.transformer = transformer or (lambda value: type(value)) + self._use_for_snippets = use_for_snippets + self._prefer_for_regexp_match = prefer_for_regexp_match + self.regexps = self.to_array(regexp) + + @property + def prefer_for_regexp_match(self): + return self._prefer_for_regexp_match + + @property + def use_for_snippets(self): + return self._use_for_snippets + @staticmethod def _get_regexp_source(regexp_pattern: Pattern) -> str: invalid_flags = [re.I, re.M] @@ -92,10 +97,10 @@ def _get_regexp_source(regexp_pattern: Pattern) -> str: return regexp_pattern.pattern def to_array( - self, regexps: Union[list[str], str, list[Pattern], Pattern] - ) -> list[str]: + self, regexps: Union[List[str], str, List[Pattern], Pattern] + ) -> List[str]: """Make a list of regexps if not already""" - array: list = regexps if isinstance(regexps, list) else [regexps] + array: List = regexps if isinstance(regexps, list) else [regexps] return [ regexp if isinstance(regexp, str) else self._get_regexp_source(regexp) for regexp in array diff --git a/python/cucumber_expressions/parameter_type_registry.py b/python/cucumber_expressions/parameter_type_registry.py index 44208bdad..7dff14ee3 100644 --- a/python/cucumber_expressions/parameter_type_registry.py +++ b/python/cucumber_expressions/parameter_type_registry.py @@ -1,7 +1,7 @@ import functools import re from decimal import Decimal -from typing import Optional, Union +from typing import Optional, List, Union from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.errors import ( @@ -77,7 +77,7 @@ def __init__(self): ) @property - def parameter_types(self) -> list: + def parameter_types(self) -> List: return list(self.parameter_type_by_name.values()) def lookup_by_type_name(self, name: str) -> Optional[ParameterType]: diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index 26fbc14ce..27ffa4fb4 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -1,6 +1,6 @@ import re from collections.abc import Generator -from typing import Optional, Union, AnyStr +from typing import Optional, Union from cucumber_expressions.argument import Argument from cucumber_expressions.parameter_type import ParameterType @@ -88,5 +88,5 @@ def generate_parameter_types( yield parameter_type, capture_name @property - def regexp(self) -> AnyStr: + def regexp(self): return self.expression_regexp.pattern diff --git a/python/cucumber_expressions/tree_regexp.py b/python/cucumber_expressions/tree_regexp.py index 7f9f06a63..a4d6fca7d 100644 --- a/python/cucumber_expressions/tree_regexp.py +++ b/python/cucumber_expressions/tree_regexp.py @@ -1,5 +1,5 @@ import re -from typing import Pattern, Union, Optional +from typing import Pattern, Union, Optional, List from cucumber_expressions.ast import EscapeCharacters from cucumber_expressions.group import Group @@ -21,7 +21,7 @@ def match(self, string: str) -> Optional[Group]: def create_group_builder(self, regexp): source = regexp.pattern - stack: list[GroupBuilder] = [GroupBuilder()] + stack: List[GroupBuilder] = [GroupBuilder()] group_start_stack = [] escaping: bool = False char_class: bool = False diff --git a/python/pyproject.toml b/python/pyproject.toml index 094cbcccd..ea894fa60 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -39,7 +39,7 @@ classifiers = [ ] [tool.poetry.dependencies] -python = "^3.9" +python = "^3.8" [tool.poetry.dev-dependencies] pre-commit = "^3.3" From 0efab69191caaeeb04e05db13a803b0c6791bf8d Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Fri, 6 Dec 2024 09:10:10 +0000 Subject: [PATCH 4/7] Remove removing support for py 3.8 --- python/cucumber_expressions/argument.py | 4 ++-- python/cucumber_expressions/expression.py | 6 +++--- python/cucumber_expressions/expression_parser.py | 6 +++--- python/cucumber_expressions/regular_expression.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/cucumber_expressions/argument.py b/python/cucumber_expressions/argument.py index 93823c956..5f79483b7 100644 --- a/python/cucumber_expressions/argument.py +++ b/python/cucumber_expressions/argument.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Optional, List +from typing import Optional, List, Tuple from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.tree_regexp import TreeRegexp, Group @@ -19,7 +19,7 @@ def __init__( def build( tree_regexp: TreeRegexp, text: str, - parameter_types_and_names: List[tuple[ParameterType, Optional[str]]], + parameter_types_and_names: List[Tuple[ParameterType, Optional[str]]], ) -> Optional[List[Argument]]: # Check if all elements in parameter_types_and_names are tuples for item in parameter_types_and_names: diff --git a/python/cucumber_expressions/expression.py b/python/cucumber_expressions/expression.py index c25e7a8f5..f03a0ac62 100644 --- a/python/cucumber_expressions/expression.py +++ b/python/cucumber_expressions/expression.py @@ -1,4 +1,4 @@ -from typing import Optional, List +from typing import Optional, List, Tuple from cucumber_expressions.argument import Argument from cucumber_expressions.ast import Node, NodeType @@ -22,7 +22,7 @@ class CucumberExpression: def __init__(self, expression: str, parameter_type_registry: ParameterTypeRegistry): self.expression = expression self.parameter_type_registry = parameter_type_registry - self.parameter_types_and_names: List[tuple[ParameterType, Optional[str]]] = [] + self.parameter_types_and_names: List[Tuple[ParameterType, Optional[str]]] = [] self.tree_regexp = TreeRegexp( self.rewrite_to_regex(CucumberExpressionParser().parse(self.expression)) ) @@ -102,7 +102,7 @@ def rewrite_parameter(self, node: Node) -> str: def parse_parameter_name( self, name: str - ) -> tuple[Optional[str], Optional[ParameterType]]: + ) -> Tuple[Optional[str], Optional[ParameterType]]: """Helper function to parse the parameter name and return group_name and parameter_type.""" if ":" in name: group_name, parameter_type_name = name.split(":") diff --git a/python/cucumber_expressions/expression_parser.py b/python/cucumber_expressions/expression_parser.py index bf755fdbd..8fd71c4f9 100644 --- a/python/cucumber_expressions/expression_parser.py +++ b/python/cucumber_expressions/expression_parser.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import NamedTuple, Optional, Callable, List +from typing import NamedTuple, Optional, Callable, List, Tuple, Union from cucumber_expressions.ast import Token, TokenType, Node, NodeType from cucumber_expressions.errors import ( @@ -171,7 +171,7 @@ def parse_between( begin_token: TokenType, end_token: TokenType, parsers: List, - ) -> Callable[[Parser], Result | tuple[int, Node]]: + ) -> Callable[[Parser], Union[Result, Tuple[int, Node]]]: def _parse_between(parser: Parser): if not self.looking_at(parser.tokens, parser.current, begin_token): return Result(0, None) @@ -221,7 +221,7 @@ def parse_tokens_until( tokens: List[Token], start_at: int, end_tokens: List[TokenType], - ) -> tuple[int, List[Node]]: + ) -> Tuple[int, List[Node]]: current = start_at size = len(tokens) ast: List[Node] = [] diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index 27ffa4fb4..a9228e063 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -1,6 +1,6 @@ import re from collections.abc import Generator -from typing import Optional, Union +from typing import Optional, Union, Tuple from cucumber_expressions.argument import Argument from cucumber_expressions.parameter_type import ParameterType @@ -67,7 +67,7 @@ def _process_capture_group(self, group_source: str): def generate_parameter_types( self, text - ) -> Generator[tuple[ParameterType, Optional[str]]]: + ) -> Generator[Tuple[ParameterType, Optional[str]]]: for group_builder in self.tree_regexp.group_builder.children: # Extract the raw source for the group parameter_type_regexp = group_builder.source From 51a3c34278c7c127a6d59e235b95a76e8df21d8b Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Fri, 6 Dec 2024 09:13:08 +0000 Subject: [PATCH 5/7] Remove removing support for py 3.8 --- python/cucumber_expressions/expression_generator.py | 2 +- python/cucumber_expressions/regular_expression.py | 4 ++-- python/tests/test_expression.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/cucumber_expressions/expression_generator.py b/python/cucumber_expressions/expression_generator.py index 7d613e8bd..34595ad22 100644 --- a/python/cucumber_expressions/expression_generator.py +++ b/python/cucumber_expressions/expression_generator.py @@ -81,7 +81,7 @@ def escape(string: str) -> str: .replace(r"/", "\\/") ) - def create_parameter_type_matchers(self, text) -> list[ParameterTypeMatcher]: + def create_parameter_type_matchers(self, text) -> List[ParameterTypeMatcher]: parameter_type_matchers = [] for parameter_type in self.parameter_type_registry.parameter_types: if parameter_type.use_for_snippets: diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index a9228e063..9ed01a41f 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -1,6 +1,6 @@ import re from collections.abc import Generator -from typing import Optional, Union, Tuple +from typing import Optional, Union, Tuple, List from cucumber_expressions.argument import Argument from cucumber_expressions.parameter_type import ParameterType @@ -32,7 +32,7 @@ def __init__( self.parameter_type_registry = parameter_type_registry self.tree_regexp: TreeRegexp = TreeRegexp(self.expression_regexp.pattern) - def match(self, text) -> Optional[list[Argument]]: + def match(self, text) -> Optional[List[Argument]]: # Convert the generator to a list before passing it to Argument.build parameter_types_and_names = list( (parameter_type, capture_name) diff --git a/python/tests/test_expression.py b/python/tests/test_expression.py index cdc9d88af..558346c23 100644 --- a/python/tests/test_expression.py +++ b/python/tests/test_expression.py @@ -1,6 +1,6 @@ from decimal import Decimal from pathlib import Path -from typing import Optional, Any +from typing import Optional, Any, Tuple from tests.definitions import TESTDATA_ROOT_DIR @@ -20,7 +20,7 @@ def match( expression: str, match_text: str, parameter_registry: ParameterTypeRegistry = ParameterTypeRegistry(), -) -> Optional[tuple[Any, str]]: +) -> Optional[Tuple[Any, str]]: cucumber_expression = CucumberExpression(expression, parameter_registry) matches = cucumber_expression.match(match_text) From 8272b1247af54ecf466f4569577b1472af4da1cb Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Fri, 6 Dec 2024 11:23:37 +0000 Subject: [PATCH 6/7] Fix type hint for Generator --- python/cucumber_expressions/regular_expression.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index 9ed01a41f..847e7f3c1 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -1,6 +1,6 @@ import re from collections.abc import Generator -from typing import Optional, Union, Tuple, List +from typing import Optional, Union, List from cucumber_expressions.argument import Argument from cucumber_expressions.parameter_type import ParameterType @@ -65,9 +65,7 @@ def _process_capture_group(self, group_source: str): # No named group, just return the original pattern return None, group_source - def generate_parameter_types( - self, text - ) -> Generator[Tuple[ParameterType, Optional[str]]]: + def generate_parameter_types(self, text) -> Generator: for group_builder in self.tree_regexp.group_builder.children: # Extract the raw source for the group parameter_type_regexp = group_builder.source From a51f9dea7d3c8dc2241908daf2bbb55dcf02c1a0 Mon Sep 17 00:00:00 2001 From: Jason Allen Date: Sat, 7 Dec 2024 08:17:05 +0000 Subject: [PATCH 7/7] Implement trimming whitespace for named args in cucumber expression --- python/cucumber_expressions/expression.py | 2 +- python/tests/test_expression.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/python/cucumber_expressions/expression.py b/python/cucumber_expressions/expression.py index f03a0ac62..9fc4e743b 100644 --- a/python/cucumber_expressions/expression.py +++ b/python/cucumber_expressions/expression.py @@ -105,7 +105,7 @@ def parse_parameter_name( ) -> Tuple[Optional[str], Optional[ParameterType]]: """Helper function to parse the parameter name and return group_name and parameter_type.""" if ":" in name: - group_name, parameter_type_name = name.split(":") + group_name, parameter_type_name = [part.strip() for part in name.split(":")] parameter_type = self.parameter_type_registry.lookup_by_type_name( parameter_type_name ) diff --git a/python/tests/test_expression.py b/python/tests/test_expression.py index 558346c23..acc18866c 100644 --- a/python/tests/test_expression.py +++ b/python/tests/test_expression.py @@ -55,6 +55,14 @@ def test_documents_match_arguments_with_names(self): values = match("I have {cuke_count:int} cuke(s)", "I have 7 cukes") assert values[0] == (7, "cuke_count") + def test_documents_match_arguments_with_names_and_spaces(self): + values = match( + "I have { cuke_count : int } cuke(s) and {gherkin_count: int} gherkin(s)", + "I have 7 cukes and 4 gherkins", + ) + assert values[0] == (7, "cuke_count") + assert values[1] == (4, "gherkin_count") + def test_matches_float(self): assert match("{float}", "") is None assert match("{float}", ".") is None