Skip to content

Commit 8f2bc62

Browse files
authored
Python enable markdown acceptance tests (#64)
1 parent c6a4605 commit 8f2bc62

File tree

9 files changed

+339
-60
lines changed

9 files changed

+339
-60
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt
1515
- [cpp] Actually allow comment inside descriptions ([#414](https://github.com/cucumber/gherkin/pull/414))
1616
- [cpp] Add missing translations for Rule ([#415](https://github.com/cucumber/gherkin/pull/415))
1717
- [cpp] Prefer the longest step keyword ([#416](https://github.com/cucumber/gherkin/pull/416))
18+
- [Python] Fix acceptance tests ([#64](https://github.com/cucumber/gherkin/pull/64))
1819

1920
## [32.1.2] - 2025-05-25
2021
### Fixed

python/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ SOURCE_FILES = $(shell find . -name "*.py" | grep -v $(GHERKIN_PARSER))
88
GHERKIN_GENERATE_EVENTS = python -m scripts.generate_events
99
GHERKIN_GENERATE_TOKENS = python -m scripts.generate_tokens
1010

11-
GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature")
12-
BAD_FEATURE_FILES = $(shell find ../testdata/bad -name "*.feature")
11+
GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature" -o -name "*.feature.md")
12+
BAD_FEATURE_FILES = $(shell find ../testdata/bad -name "*.feature" -o -name "*.feature.md")
1313

1414
TOKENS = $(patsubst ../testdata/%,acceptance/testdata/%.tokens,$(GOOD_FEATURE_FILES))
1515
ASTS = $(patsubst ../testdata/%,acceptance/testdata/%.ast.ndjson,$(GOOD_FEATURE_FILES))

python/gherkin/stream/gherkin_events.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from gherkin.stream.id_generator import IdGenerator
1212
from gherkin.stream.source_events import Event
1313
from gherkin.token import Location
14+
from gherkin.token_matcher import TokenMatcher
15+
from gherkin.token_matcher_markdown import GherkinInMarkdownTokenMatcher
1416

1517

1618
class Source(TypedDict):
@@ -65,7 +67,12 @@ def enum(
6567
source = source_event["source"]["data"]
6668

6769
try:
68-
gherkin_document = self.parser.parse(source)
70+
matcher=None
71+
if source_event["source"]["mediaType"] == 'text/x.cucumber.gherkin+plain':
72+
matcher = TokenMatcher()
73+
elif source_event["source"]["mediaType"] == 'text/x.cucumber.gherkin+markdown':
74+
matcher = GherkinInMarkdownTokenMatcher()
75+
gherkin_document = self.parser.parse(source, matcher)
6976
gherkin_document_with_uri: GherkinDocumentWithURI = {
7077
**gherkin_document,
7178
"uri": uri,

python/gherkin/stream/source_events.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,18 @@ class Event(TypedDict):
1414
source: Source
1515

1616

17+
def _media_type(path) -> String:
18+
if(path.endswith(".feature")):
19+
return 'text/x.cucumber.gherkin+plain'
20+
if(path.endswith(".feature.md")):
21+
return 'text/x.cucumber.gherkin+markdown'
22+
1723
def source_event(path: str) -> Event:
1824
event: Event = {
1925
"source": {
2026
"uri": path,
2127
"data": open(path, encoding="utf8", newline="").read(),
22-
"mediaType": "text/x.cucumber.gherkin+plain",
28+
"mediaType": _media_type(path),
2329
}
2430
}
2531
return event

python/gherkin/token_matcher_markdown.py

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

33
import re
4+
from collections import defaultdict
45
from collections.abc import Iterable
6+
from typing import TypedDict
57

68
from .gherkin_line import Cell
79
from .token import Token
810
from .token_matcher import TokenMatcher, MatchedItems
11+
from .dialect import Dialect
912

1013
KEYWORD_PREFIX_BULLET = "^(\\s*[*+-]\\s*)"
1114
KEYWORD_PREFIX_HEADER = "^(#{1,6}\\s)"
@@ -23,7 +26,7 @@ def reset(self) -> None:
2326
def match_FeatureLine(self, token: Token) -> bool:
2427

2528
if self.matched_feature_line:
26-
self._set_token_matched(token, None)
29+
return False
2730

2831
# We first try to match "# Feature: blah"
2932
result = self._match_title_line(
@@ -39,8 +42,8 @@ def match_FeatureLine(self, token: Token) -> bool:
3942

4043
if not result:
4144
self._set_token_matched(token, "FeatureLine", token.line.get_line_text())
42-
self.matched_feature_line = result
43-
return result
45+
self.matched_feature_line = True
46+
return True
4447

4548
def match_RuleLine(self, token: Token) -> bool:
4649
return self._match_title_line(
@@ -103,26 +106,19 @@ def _is_gfm_table_separator(self, table_cells: list[Cell]) -> bool:
103106
return len(separator_values) > 0
104107

105108
def match_StepLine(self, token: Token) -> bool:
106-
nonStarStepKeywords = (
107-
self.dialect.given_keywords
108-
+ self.dialect.when_keywords
109-
+ self.dialect.then_keywords
110-
+ self.dialect.and_keywords
111-
+ self.dialect.but_keywords
112-
)
113109
return self._match_title_line(
114-
KEYWORD_PREFIX_BULLET, nonStarStepKeywords, "", token, "StepLine"
110+
KEYWORD_PREFIX_BULLET, self._sorted_step_keywords, "", token, "StepLine"
115111
)
116112

117113
def match_Comment(self, token: Token) -> bool:
118114
if token.line.startswith("|"):
119115
table_cells = token.line.table_cells
120116
if self._is_gfm_table_separator(table_cells):
117+
self._set_token_matched(token, "Empty", indent=0)
121118
return True
122-
return self._set_token_matched(token, None, False)
119+
return False
123120

124121
def match_Empty(self, token: Token) -> bool:
125-
126122
result = False
127123
if token.line.is_empty():
128124
result = True
@@ -199,18 +195,35 @@ def _match_title_line(
199195
token: Token,
200196
token_type: str,
201197
) -> bool:
202-
keywords_or_list = "|".join(map(lambda x: re.escape(x), keywords))
203-
match = re.search(
204-
f"{prefix}({keywords_or_list}){keywordSuffix}(.*)",
205-
token.line.get_line_text(),
206-
)
207-
indent = token.line.indent
208-
209-
if match:
210-
matchedKeyword = match.group(2)
211-
indent += len(match.group(1))
212-
self._set_token_matched(
213-
token, token_type, match.group(3).strip(), matchedKeyword, indent=indent
198+
text = token.line.get_line_text()
199+
for keyword in keywords:
200+
match = re.search(
201+
f"{prefix}({re.escape(keyword)}){keywordSuffix}(.*)",
202+
text
214203
)
215-
return True
204+
if match:
205+
indent = token.line.indent + len(match.group(1))
206+
matchedKeyword = match.group(2)
207+
# only set the keyword type if this is a step keyword
208+
if( matchedKeyword in self.keyword_types ):
209+
matchedKeywordType = self.keyword_types[matchedKeyword][0]
210+
else:
211+
matchedKeywordType = None
212+
self._set_token_matched(
213+
token,
214+
token_type,
215+
match.group(3).strip(),
216+
matchedKeyword,
217+
keyword_type=matchedKeywordType,
218+
indent=indent
219+
)
220+
return True
221+
216222
return False
223+
224+
def _change_dialect(self, dialect_name, location=None) -> None:
225+
super()._change_dialect(dialect_name, location)
226+
self._sorted_step_keywords = list(filter(
227+
lambda key: key != '* ',
228+
self._sorted_step_keywords
229+
))

python/scripts/generate_tokens.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import sys
2-
32
from gherkin.token_scanner import TokenScanner
43
from gherkin.token_formatter_builder import TokenFormatterBuilder
54
from gherkin.parser import Parser
5+
from gherkin.token_matcher_markdown import GherkinInMarkdownTokenMatcher
66

77

88
def main() -> None:
99
files = sys.argv[1:]
1010
parser = Parser(TokenFormatterBuilder())
1111
for file in files:
1212
scanner = TokenScanner(file)
13-
print(parser.parse(scanner))
13+
14+
if(file.endswith('.md')):
15+
print(parser.parse(scanner, GherkinInMarkdownTokenMatcher()) )
16+
else:
17+
print(parser.parse(scanner))
1418

1519

1620
if __name__ == "__main__":

python/test/gherkin_in_markdown_token_matcher_test.py

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,18 @@
55
location = {"line": 1, "column": 1}
66

77

8-
def test_it_matches_FeatureLine():
9-
tm = GherkinInMarkdownTokenMatcher("en")
10-
line = GherkinLine("""## Feature: hello""", location["line"])
8+
def test_it_matches_FeatureLineH1():
9+
tm = GherkinInMarkdownTokenMatcher('en')
10+
line = GherkinLine('''# Feature: hello''',location['line'])
11+
token = Token(gherkin_line=line, location=location)
12+
assert tm.match_FeatureLine(token)
13+
assert token.matched_type == 'FeatureLine'
14+
assert token.matched_keyword == 'Feature'
15+
assert token.matched_text == 'hello'
16+
17+
def test_it_matches_FeatureLineH2():
18+
tm = GherkinInMarkdownTokenMatcher('en')
19+
line = GherkinLine('''## Feature: hello''',location['line'])
1120
token = Token(gherkin_line=line, location=location)
1221
assert tm.match_FeatureLine(token)
1322
assert token.matched_type == "FeatureLine"
@@ -25,6 +34,15 @@ def test_it_matches_FeatureLine_in_French():
2534
assert token.matched_text == "hello"
2635

2736

37+
def test_it_matches_FeatureLine_without_the_Feature_keyword():
38+
tm = GherkinInMarkdownTokenMatcher('en')
39+
line = GherkinLine('''# hello''',location['line'])
40+
token = Token(gherkin_line=line, location=location)
41+
assert tm.match_FeatureLine(token)
42+
assert token.matched_type == 'FeatureLine'
43+
assert token.matched_keyword == None
44+
assert token.matched_text == '# hello'
45+
2846
def test_it_matches_bullet_Step():
2947
tm = GherkinInMarkdownTokenMatcher("en")
3048
line = GherkinLine(""" * Given I have 3 cukes""", location["line"])
@@ -41,21 +59,33 @@ def test_it_matches_plus_Step():
4159
line = GherkinLine(""" + Given I have 3 cukes""", location["line"])
4260
token = Token(gherkin_line=line, location=location)
4361
assert tm.match_StepLine(token)
44-
assert token.matched_type == "StepLine"
45-
assert token.matched_keyword == "Given "
46-
assert token.matched_text == "I have 3 cukes"
47-
assert token.location["column"] == 6
48-
62+
assert token.matched_type == 'StepLine'
63+
assert token.matched_keyword == 'Given '
64+
assert token.matched_keyword_type == 'Context'
65+
assert token.matched_text == 'I have 3 cukes'
66+
assert token.location['column'] == 6
4967

5068
def test_it_matches_hyphen_Step():
5169
tm = GherkinInMarkdownTokenMatcher("en")
5270
line = GherkinLine(""" - Given I have 3 cukes""", location["line"])
5371
token = Token(gherkin_line=line, location=location)
5472
assert tm.match_StepLine(token)
55-
assert token.matched_type == "StepLine"
56-
assert token.matched_keyword == "Given "
57-
assert token.matched_text == "I have 3 cukes"
58-
assert token.location["column"] == 6
73+
assert token.matched_type == 'StepLine'
74+
assert token.matched_keyword == 'Given '
75+
assert token.matched_keyword_type == 'Context'
76+
assert token.matched_text == 'I have 3 cukes'
77+
assert token.location['column'] == 6
78+
79+
def test_it_matches_a_when_Step():
80+
tm = GherkinInMarkdownTokenMatcher('en')
81+
line = GherkinLine(''' - When I do something''',location['line'])
82+
token = Token(gherkin_line=line, location=location)
83+
assert tm.match_StepLine(token)
84+
assert token.matched_type == 'StepLine'
85+
assert token.matched_keyword == 'When '
86+
assert token.matched_keyword_type == 'Action'
87+
assert token.matched_text == 'I do something'
88+
assert token.location['column'] == 6
5989

6090

6191
def test_it_matches_arbitrary_text_as_Other():
@@ -156,19 +186,6 @@ def test_it_does_not_match_table_row_indented_6_space():
156186
assert not tm.match_TableRow(token)
157187

158188

159-
def test_it_matches_table_separator_row_as_comment():
160-
tm = GherkinInMarkdownTokenMatcher("en")
161-
162-
l1 = GherkinLine(" | h1 | h2 |", location["line"])
163-
t1 = Token(l1, location)
164-
assert tm.match_TableRow(t1)
165-
166-
l2 = GherkinLine(" | --- | --- |", location["line"])
167-
t2 = Token(l2, location)
168-
assert not tm.match_TableRow(t2)
169-
assert tm.match_Comment(t2)
170-
171-
172189
def test_it_matches_indented_tags():
173190
tm = GherkinInMarkdownTokenMatcher("en")
174191

@@ -238,6 +255,34 @@ def test_it_matches_ExamplesLine():
238255
line = GherkinLine("""## Examples: """, location["line"])
239256
token = Token(gherkin_line=line, location=location)
240257
assert tm.match_ExamplesLine(token)
241-
assert token.matched_type == "ExamplesLine"
242-
assert token.matched_keyword == "Examples"
243-
assert token.matched_text == ""
258+
assert token.matched_type == 'ExamplesLine'
259+
assert token.matched_keyword == 'Examples'
260+
assert token.matched_text == ''
261+
262+
def test_it_matches_Empty():
263+
tm = GherkinInMarkdownTokenMatcher('en')
264+
line = GherkinLine('''''',location['line'])
265+
token = Token(gherkin_line=line, location=location)
266+
assert tm.match_Empty(token)
267+
assert token.matched_type == 'Empty'
268+
assert token.matched_keyword == None
269+
assert token.matched_text == None
270+
271+
def test_it_matches_arbitrary_text_as_Empty_after_the_FeatureLine_has_already_been_matched():
272+
# White Box testing - implementation detail...
273+
# Given the FeatureLine has already been matched
274+
tm = GherkinInMarkdownTokenMatcher('en')
275+
276+
line = GherkinLine('''# something arbitrary''',location['line'])
277+
token = Token(gherkin_line=line, location=location)
278+
assert(tm.match_FeatureLine(token))
279+
280+
line = GherkinLine('''arbitrary text''',location['line'])
281+
token=Token(gherkin_line=line, location=location)
282+
283+
assert(tm.match_Empty(token))
284+
assert token.matched_type == 'Empty'
285+
assert token.matched_items == []
286+
assert token.matched_keyword == None
287+
assert token.matched_text == None
288+
pass

0 commit comments

Comments
 (0)