@@ -3,7 +3,298 @@ package Gherkin::MarkdownTokenMatcher;
33use strict;
44use warnings;
55
6- use base ' Gherkin::TokenMatcher' ;
6+ our $DEFAULT_DOC_STRING_SEPARATOR = q/ ^(```[`]*)(.*)/ ;
7+ our $KEYWORD_PREFIX_BULLET = q/ ^(\\s*[*+-]\\s*)/ ;
8+ our $KEYWORD_PREFIX_HEADER = q/ ^(#{1,6}\\s)/ ;
9+
10+ use Class::XSAccessor accessors => [
11+ qw/ dialect _default_dialect_name _indent_to_remove _active_doc_string_separator _keyword_types
12+ _matched_FeatureLine _non_star_step_keywords/ ,
13+ ];
14+
15+ use Gherkin::Dialect;
16+
17+ sub new {
18+ my ( $class , $options ) = @_ ;
19+ $options -> {' dialect' } ||= Gherkin::Dialect-> new( { dialect => ' en' } );
20+ my $self = bless $options , $class ;
21+ $self -> _default_dialect_name( $self -> dialect_name );
22+ my @non_star_step_keywords = map {
23+ grep { $_ ne ' * ' }
24+ @{ $self -> dialect-> $_ }
25+ } qw/ Given When Then And But/ ;
26+ $self -> _non_star_step_keywords( \@non_star_step_keywords );
27+ $self -> reset ();
28+ return $self ;
29+ }
30+
31+ sub _add_keyword_type_mappings {
32+ my ( $keyword_types , $keywords , $type ) = @_ ;
33+
34+ for my $keyword ( @{$keywords } ) {
35+ if ( not exists $keyword_types -> {$keyword } ) {
36+ $keyword_types -> {$keyword } = [];
37+ }
38+ push @{ $keyword_types -> {$keyword } }, $type ;
39+ }
40+ return ;
41+ }
42+
43+ sub dialect_name { return $_ [0]-> dialect-> dialect; }
44+
45+ sub change_dialect {
46+ my $self = shift ;
47+ $self -> dialect-> change_dialect(@_ );
48+
49+ my $keyword_types = {};
50+ _add_keyword_type_mappings( $keyword_types , $self -> dialect-> Given,
51+ Cucumber::Messages::Step::KEYWORDTYPE_CONTEXT );
52+ _add_keyword_type_mappings( $keyword_types , $self -> dialect-> When,
53+ Cucumber::Messages::Step::KEYWORDTYPE_ACTION );
54+ _add_keyword_type_mappings( $keyword_types , $self -> dialect-> Then,
55+ Cucumber::Messages::Step::KEYWORDTYPE_OUTCOME );
56+ _add_keyword_type_mappings( $keyword_types , [ @{ $self -> dialect-> And }, @{ $self -> dialect-> But } ],
57+ Cucumber::Messages::Step::KEYWORDTYPE_CONJUNCTION );
58+ $self -> _keyword_types($keyword_types );
59+ return ;
60+ }
61+
62+ sub reset {
63+ my $self = shift ;
64+ $self -> change_dialect( $self -> _default_dialect_name );
65+ $self -> _indent_to_remove(0);
66+ $self -> _active_doc_string_separator($DEFAULT_DOC_STRING_SEPARATOR );
67+ return ;
68+ }
69+
70+ sub match_FeatureLine {
71+ my ( $self , $token ) = @_ ;
72+ return if $self -> _matched_FeatureLine;
73+
74+ # We first try to match "# Feature: blah"
75+ my $result = $self -> _match_title_line( $KEYWORD_PREFIX_HEADER , ' :' ,
76+ $token , FeatureLine => $self -> dialect-> Feature );
77+ # If we didn't match "# Feature: blah", we still match this line
78+ # as a FeatureLine.
79+ # The reason for this is that users may not want to be constrained by having this as their fist line.
80+ unless ($result ) {
81+ $self -> _set_token_matched( $token ,
82+ FeatureLine => { text => $token -> line-> _trimmed_line_text } );
83+ }
84+ $self -> _matched_FeatureLine(1);
85+ return 1;
86+ }
87+
88+ sub match_RuleLine {
89+ my ( $self , $token ) = @_ ;
90+ return $self -> _match_title_line( $KEYWORD_PREFIX_HEADER , ' :' ,
91+ $token , RuleLine => $self -> dialect-> Rule );
92+ }
93+
94+ sub match_ScenarioLine {
95+ my ( $self , $token ) = @_ ;
96+ return $self -> _match_title_line( $KEYWORD_PREFIX_HEADER , ' :' ,
97+ $token , ScenarioLine => $self -> dialect-> Scenario )
98+ || $self -> _match_title_line( $KEYWORD_PREFIX_HEADER , ' :' ,
99+ $token , ' ScenarioLine' => $self -> dialect-> ScenarioOutline );
100+ }
101+
102+ sub match_BackgroundLine {
103+ my ( $self , $token ) = @_ ;
104+ return $self -> _match_title_line( $KEYWORD_PREFIX_HEADER , ' :' ,
105+ $token , BackgroundLine => $self -> dialect-> Background );
106+ }
107+
108+ sub match_ExamplesLine {
109+ my ( $self , $token ) = @_ ;
110+ return $self -> _match_title_line( $KEYWORD_PREFIX_HEADER , ' :' ,
111+ $token , ExamplesLine => $self -> dialect-> Examples );
112+ }
113+
114+ sub match_Language {
115+ my ( $self , $token ) = @_ ;
116+ # We've made a deliberate choice not to support `# language: [ISO 639-1]` headers or similar
117+ # in Markdown. Users should specify a language globally.
118+ return ;
119+ }
120+
121+ sub match_TagLine {
122+ my ( $self , $token ) = @_ ;
123+ return unless $token -> line;
124+
125+ my @tags = ();
126+ while ( $token -> line-> line_text =~ m / `(@[^`]+)`/ g ) {
127+ push @tags ,
128+ {
129+ column => 2 + length $` ,
130+ text => $1 ,
131+ };
132+ }
133+ return unless scalar @tags ;
134+ $self -> _set_token_matched( $token ,
135+ TagLine => { items => \@tags } );
136+ return 1;
137+ }
138+
139+ sub _match_title_line {
140+ my ( $self , $prefix , $keyword_suffix , $token , $token_type , $keywords ) = @_ ;
141+ return unless $token -> line;
142+
143+ my $regex = $prefix . ' (' . join ( ' |' , @{$keywords } ) . ' )' . $keyword_suffix . ' \s*(.*)' ;
144+ if ( $token -> line-> _trimmed_line_text =~ qr /$regex / ) {
145+ my $indent = $token -> line-> indent + ( length $1 || 0 );
146+ my $keyword = $2 ;
147+ my $text = $3 ;
148+ $text =~ s /\s +$// ;
149+ my $keyword_type ;
150+ if ( exists $self -> _keyword_types-> {$keyword } ) {
151+ # only set the keyword type if this is a step keyword
152+ $keyword_type =
153+ ( scalar @{ $self -> _keyword_types-> {$keyword } } > 1 )
154+ ? Cucumber::Messages::Step::KEYWORDTYPE_UNKNOWN
155+ : $self -> _keyword_types-> {$keyword }-> [0];
156+ }
157+ $self -> _set_token_matched( $token , $token_type ,
158+ { indent => $indent , keyword => $keyword , text => $text , keyword_type => $keyword_type } );
159+ return 1;
160+ }
161+ return ;
162+ }
163+
164+ sub _set_token_matched {
165+ my ( $self , $token , $matched_type , $options ) = @_ ;
166+ $options -> {' items' } ||= [];
167+ $token -> matched_type($matched_type );
168+
169+ if ( defined $options -> {' text' } ) {
170+ chomp $options -> {' text' };
171+ $token -> matched_text( $options -> {' text' } );
172+ }
173+
174+ $token -> matched_keyword( $options -> {' keyword' } )
175+ if defined $options -> {' keyword' };
176+ $token -> matched_keyword_type( $options -> {' keyword_type' } )
177+ if defined $options -> {' keyword_type' };
178+
179+ if ( defined $options -> {' indent' } ) {
180+ $token -> matched_indent( $options -> {' indent' } );
181+ } else {
182+ $token -> matched_indent( $token -> line ? $token -> line-> indent : 0 );
183+ }
184+
185+ $token -> matched_items( $options -> {' items' } )
186+ if defined $options -> {' items' };
187+
188+ $token -> location-> {' column' } = $token -> matched_indent + 1;
189+ $token -> matched_gherkin_dialect( $self -> dialect_name );
190+ return ;
191+ }
192+
193+ sub match_EOF {
194+ my ( $self , $token ) = @_ ;
195+ if ( $token -> is_eof ) {
196+ $self -> _set_token_matched( $token , ' EOF' );
197+ return 1;
198+ }
199+ }
200+
201+ sub match_Empty {
202+ my ( $self , $token ) = @_ ;
203+ return unless $token -> line;
204+
205+ if (
206+ $token -> line-> is_empty
207+ || ( !$self -> match_TagLine($token )
208+ && !$self -> match_FeatureLine($token )
209+ && !$self -> match_ScenarioLine($token )
210+ && !$self -> match_BackgroundLine($token )
211+ && !$self -> match_ExamplesLine($token )
212+ && !$self -> match_RuleLine($token )
213+ && !$self -> match_TableRow($token )
214+ && !$self -> match_Comment($token )
215+ && !$self -> match_Language($token )
216+ && !$self -> match_DocStringSeparator($token )
217+ && !$self -> match_EOF($token )
218+ && !$self -> match_StepLine($token ) )
219+ )
220+ {
221+ $self -> _set_token_matched( $token ,
222+ Empty => { indent => 0 } );
223+ return 1;
224+ }
225+ return ;
226+ }
227+
228+ sub match_Comment {
229+ my ( $self , $token ) = @_ ;
230+ return unless $token -> line;
231+
232+ if ( $token -> line-> startswith(' |' )
233+ && $self -> _is_gfm_table_separator( $token -> line-> table_cells ) )
234+ {
235+ $self -> _set_token_matched( $token ,
236+ Comment => { text => $token -> line-> line_text, indent => 0 } );
237+ return 1;
238+ }
239+ return ;
240+ }
241+
242+ sub _is_gfm_table_separator {
243+ my ( $self , $table_cells ) = @_ ;
244+ my @separator_values = grep { $_ -> {' text' } =~ m / ^:?-+:?$ / } @{$table_cells };
245+ return scalar @separator_values ;
246+ }
247+
248+ sub match_Other {
249+ my ( $self , $token ) = @_ ;
250+ return unless $token -> line;
251+
252+ # take the entire line, except removing DocString indents
253+ my $text = $token -> line-> get_line_text( $self -> _indent_to_remove );
254+ $self -> _set_token_matched( $token ,
255+ Other => { indent => 0, text => $text } );
256+ return 1;
257+ }
258+
259+ sub match_StepLine {
260+ my ( $self , $token ) = @_ ;
261+ return $self -> _match_title_line( $KEYWORD_PREFIX_BULLET , ' ' ,
262+ $token , StepLine => $self -> _non_star_step_keywords );
263+ }
264+
265+ sub match_DocStringSeparator {
266+ my ( $self , $token ) = @_ ;
267+ return unless $token -> line;
268+
269+ my $active_doc_string_separator = $self -> _active_doc_string_separator;
270+ if ( $token -> line-> line_text =~ qr /$active_doc_string_separator / ) {
271+ if ( $self -> _active_doc_string_separator eq $DEFAULT_DOC_STRING_SEPARATOR ) {
272+ $self -> _active_doc_string_separator( ' ^(' . $1 . ' )$' );
273+ $self -> _indent_to_remove( $token -> line-> indent );
274+ } else {
275+ $self -> _active_doc_string_separator($DEFAULT_DOC_STRING_SEPARATOR );
276+ }
277+ $self -> _set_token_matched( $token ,
278+ DocStringSeparator => { text => ' ' , keyword => $1 } );
279+ return 1;
280+ }
281+ return ;
282+ }
283+
284+ sub match_TableRow {
285+ my ( $self , $token ) = @_ ;
286+ return unless $token -> line;
287+
288+ # Gherkin tables must be indented 2-5 spaces in order to be distinguidedn from non-Gherkin tables
289+ if ( $token -> line-> line_text =~ m / ^\s\s\s ?\s ?\s ?\| / ) {
290+ my $table_cells = $token -> line-> table_cells;
291+ return if ( $self -> _is_gfm_table_separator($table_cells ) );
292+ $self -> _set_token_matched( $token ,
293+ TableRow => { items => $table_cells } );
294+ return 1;
295+ }
296+ return ;
297+ }
7298
82991;
9300
0 commit comments