Skip to content

Commit 54372bc

Browse files
committed
[Perl] add MarkdownTokenMatcher skeleton
1 parent 7bb9365 commit 54372bc

File tree

6 files changed

+116
-9
lines changed

6 files changed

+116
-9
lines changed

perl/bin/gherkin-generate-tokens

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,23 @@ use lib 'Gherkin-latest/lib';
66

77
use Gherkin::Parser;
88
use Gherkin::TokenFormatterBuilder;
9+
use Gherkin::TokenMatcher;
10+
use Gherkin::MarkdownTokenMatcher;
911

1012
package App::GherkinGenerateTokens;
1113

1214
sub run {
1315
my ( $class, $fh, @file_list ) = @_;
1416

15-
my $parser
16-
= Gherkin::Parser->new( Gherkin::TokenFormatterBuilder->new() );
17-
18-
print $fh join "\n", @{ $parser->parse($_) } for @file_list;
17+
print $fh join "\n",
18+
@{ Gherkin::Parser->new(
19+
Gherkin::TokenFormatterBuilder->new(),
20+
/\.md$/
21+
? Gherkin::MarkdownTokenMatcher->new()
22+
: Gherkin::TokenMatcher->new()
23+
)->parse($_)
24+
}
25+
for @file_list;
1926
print $fh "\n";
2027

2128
return;

perl/lib/Gherkin.pm

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use Cucumber::Messages;
1010
use Gherkin::AstBuilder;
1111
use Gherkin::Parser;
1212
use Gherkin::Pickles::Compiler;
13+
use Gherkin::TokenMatcher;
14+
use Gherkin::MarkdownTokenMatcher;
1315

1416

1517
use Class::XSAccessor accessors =>
@@ -55,8 +57,10 @@ sub from_paths {
5557
source => Cucumber::Messages::Source->new(
5658
uri => $path,
5759
data => $content,
58-
media_type => Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_PLAIN,
59-
)
60+
media_type => $path =~ m/\.md$/
61+
? Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_MARKDOWN
62+
: Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_PLAIN,
63+
)
6064
),
6165
$id_generator,
6266
$sink);
@@ -115,8 +119,11 @@ sub from_source {
115119
if ($self->include_ast or $self->include_pickles) {
116120
my $source = $envelope->source;
117121
my $parser = Gherkin::Parser->new(
118-
Gherkin::AstBuilder->new($id_generator)
119-
);
122+
Gherkin::AstBuilder->new($id_generator),
123+
$source->media_type eq Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_MARKDOWN
124+
? Gherkin::MarkdownTokenMatcher->new()
125+
: Gherkin::TokenMatcher->new()
126+
);
120127
my $data = $source->data;
121128

122129
local $@;

perl/lib/Gherkin/Dialect.pm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ one to be used for keyword translation lookup. Out of the box, Gherkin comes
9797
with actual translations, such as C<Afrikaans> as well as 'slang-like'
9898
translations such as "Pirate English".
9999
100-
This module is used by the L<token matcher|Gherkin::TokenMatcher> to identify
100+
This module is used by the L<token matcher|Gherkin::TokenMatcher> and
101+
the L<Markdown token matcher|Gherkin::MarkdownTokenMatcher> to identify
101102
the type of token (input line) passed to the scanner.
102103
103104
=head1 METHODS
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package Gherkin::MarkdownTokenMatcher;
2+
3+
use strict;
4+
use warnings;
5+
6+
use base 'Gherkin::TokenMatcher';
7+
8+
1;
9+
10+
11+
__END__
12+
13+
14+
=head1 NAME
15+
16+
Gherkin::MarkdownTokenMatcher - Line token matching for the Gherkin parser
17+
18+
=head1 SYNOPSIS
19+
20+
use Gherkin::MarkdownTokenMatcher;
21+
use Gherkin::Dialect;
22+
23+
# Instantiate a token matcher with the default language 'Emoji'
24+
my $matcher = Gherkin::MarkdownTokenMatcher->new( {
25+
dialect => Gherkin::Dialect->new( { dialect => 'em'} )
26+
} );
27+
28+
=head1 DESCRIPTION
29+
30+
This is an alternate token matcher for Markdown with Gherkin (MDG).
31+
32+
The Gherkin language has a line-based structure. The parser knows about state,
33+
but defers identifying the type of line tokens to the token matcher. The
34+
matcher knows how to identify line tokens based on the grammar's keywords.
35+
Although the matcher knows how to identify line tokens based on the keywords,
36+
it depends on L<Gherkin::Dialect> to provide the actual keyword texts.
37+
38+
=head1 METHODS
39+
40+
=head2 new( [$options] )
41+
42+
Constructor.
43+
44+
C<$options> is a hashref with the following keys:
45+
46+
=over
47+
48+
=item C<dialect>
49+
50+
An instance of L<Gherkin::Dialect> to provide the keyword texts used to identify
51+
the type of line-token being matched.
52+
53+
=back
54+
55+
=head2 dialect_name
56+
57+
Returns the name of the current dialect selected from the C<dialect> instance.
58+
59+
=head2 change_dialect
60+
61+
Changes the selected dialect on the C<dialect> instance. Dialects are groups of
62+
keywords belonging together; this is how keyword translations are being handled.
63+
64+
=head2 reset
65+
66+
Changes the token scanner's state back to its initial state; used to restart
67+
scanning a document. Multiple documents may be parsed using a single token
68+
scanner with a C<reset> call in-between.
69+
70+
=head1 SEE ALSO
71+
72+
=over 8
73+
74+
=item * L<Gherkin>
75+
76+
=item * L<Gherkin::Dialect>
77+
78+
=item * L<Gherkin::Parser>
79+
80+
=item * L<Gherkin::TokenMatcher>
81+
82+
=back
83+
84+
=head1 LICENSE
85+
86+
See L<Gherkin>.
87+
88+
=cut

perl/lib/Gherkin/Parser.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ The C<$uri> parameter is expected to be passed in all but the third case.
7575
7676
=item * L<Gherkin::Dialect>
7777
78+
=item * L<Gherkin::MarkdownTokenMatcher>
79+
7880
=item * L<Gherkin::TokenMatcher>
7981
8082
=item * L<Gherkin::TokenScanner>

perl/lib/Gherkin/TokenMatcher.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ scanner with a C<reset> call in-between.
349349
350350
=item * L<Gherkin::Dialect>
351351
352+
=item * L<Gherkin::MarkdownTokenMatcher>
353+
352354
=item * L<Gherkin::Parser>
353355
354356
=back

0 commit comments

Comments
 (0)