Skip to content

Commit 0e845dd

Browse files
committed
[Perl] add MarkdownTokenMatcher skeleton
1 parent 6efbdb2 commit 0e845dd

File tree

6 files changed

+115
-9
lines changed

6 files changed

+115
-9
lines changed

perl/bin/gherkin-generate-tokens

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,23 @@ use lib 'Gherkin-latest/lib';
66

77
use Gherkin::Parser;
88
use Gherkin::TokenFormatterBuilder;
9+
use Gherkin::TokenMatcher;
10+
use Gherkin::MarkdownTokenMatcher;
911

1012
package App::GherkinGenerateTokens;
1113

1214
sub run {
1315
my ( $class, $fh, @file_list ) = @_;
1416

15-
my $parser
16-
= Gherkin::Parser->new( Gherkin::TokenFormatterBuilder->new() );
17-
18-
print $fh join "\n", @{ $parser->parse($_) } for @file_list;
17+
print $fh join "\n",
18+
@{ Gherkin::Parser->new(
19+
Gherkin::TokenFormatterBuilder->new(),
20+
/\.md$/
21+
? Gherkin::MarkdownTokenMatcher->new()
22+
: Gherkin::TokenMatcher->new()
23+
)->parse($_)
24+
}
25+
for @file_list;
1926
print $fh "\n";
2027

2128
}

perl/lib/Gherkin.pm

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use Cucumber::Messages;
1010
use Gherkin::AstBuilder;
1111
use Gherkin::Parser;
1212
use Gherkin::Pickles::Compiler;
13+
use Gherkin::TokenMatcher;
14+
use Gherkin::MarkdownTokenMatcher;
1315

1416

1517
use Class::XSAccessor accessors =>
@@ -55,8 +57,10 @@ sub from_paths {
5557
source => Cucumber::Messages::Source->new(
5658
uri => $path,
5759
data => $content,
58-
media_type => Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_PLAIN,
59-
)
60+
media_type => $path =~ m/\.md$/
61+
? Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_MARKDOWN
62+
: Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_PLAIN,
63+
)
6064
),
6165
$id_generator,
6266
$sink);
@@ -113,8 +117,11 @@ sub from_source {
113117
if ($self->include_ast or $self->include_pickles) {
114118
my $source = $envelope->source;
115119
my $parser = Gherkin::Parser->new(
116-
Gherkin::AstBuilder->new($id_generator)
117-
);
120+
Gherkin::AstBuilder->new($id_generator),
121+
$source->media_type eq Cucumber::Messages::Source::MEDIATYPE_TEXT_X_CUCUMBER_GHERKIN_MARKDOWN
122+
? Gherkin::MarkdownTokenMatcher->new()
123+
: Gherkin::TokenMatcher->new()
124+
);
118125
my $data = $source->data;
119126

120127
local $@;

perl/lib/Gherkin/Dialect.pm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ one to be used for keyword translation lookup. Out of the box, Gherkin comes
9696
with actual translations, such as C<Afrikaans> as well as 'slang-like'
9797
translations such as "Pirate English".
9898
99-
This module is used by the L<token matcher|Gherkin::TokenMatcher> to identify
99+
This module is used by the L<token matcher|Gherkin::TokenMatcher> and
100+
the L<Markdown token matcher|Gherkin::MarkdownTokenMatcher> to identify
100101
the type of token (input line) passed to the scanner.
101102
102103
=head1 METHODS
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package Gherkin::MarkdownTokenMatcher;
2+
3+
use strict;
4+
use warnings;
5+
6+
use base 'Gherkin::TokenMatcher';
7+
8+
1;
9+
10+
__END__
11+
12+
13+
=head1 NAME
14+
15+
Gherkin::MarkdownTokenMatcher - Line token matching for the Gherkin parser
16+
17+
=head1 SYNOPSIS
18+
19+
use Gherkin::MarkdownTokenMatcher;
20+
use Gherkin::Dialect;
21+
22+
# Instantiate a token matcher with the default language 'Emoji'
23+
my $matcher = Gherkin::MarkdownTokenMatcher->new( {
24+
dialect => Gherkin::Dialect->new( { dialect => 'em'} )
25+
} );
26+
27+
=head1 DESCRIPTION
28+
29+
This is an alternate token matcher for Markdown with Gherkin (MDG).
30+
31+
The Gherkin language has a line-based structure. The parser knows about state,
32+
but defers identifying the type of line tokens to the token matcher. The
33+
matcher knows how to identify line tokens based on the grammar's keywords.
34+
Although the matcher knows how to identify line tokens based on the keywords,
35+
it depends on L<Gherkin::Dialect> to provide the actual keyword texts.
36+
37+
=head1 METHODS
38+
39+
=head2 new( [$options] )
40+
41+
Constructor.
42+
43+
C<$options> is a hashref with the following keys:
44+
45+
=over
46+
47+
=item C<dialect>
48+
49+
An instance of L<Gherkin::Dialect> to provide the keyword texts used to identify
50+
the type of line-token being matched.
51+
52+
=back
53+
54+
=head2 dialect_name
55+
56+
Returns the name of the current dialect selected from the C<dialect> instance.
57+
58+
=head2 change_dialect
59+
60+
Changes the selected dialect on the C<dialect> instance. Dialects are groups of
61+
keywords belonging together; this is how keyword translations are being handled.
62+
63+
=head2 reset
64+
65+
Changes the token scanner's state back to its initial state; used to restart
66+
scanning a document. Multiple documents may be parsed using a single token
67+
scanner with a C<reset> call in-between.
68+
69+
=head1 SEE ALSO
70+
71+
=over 8
72+
73+
=item * L<Gherkin>
74+
75+
=item * L<Gherkin::Dialect>
76+
77+
=item * L<Gherkin::Parser>
78+
79+
=item * L<Gherkin::TokenMatcher>
80+
81+
=back
82+
83+
=head1 LICENSE
84+
85+
See L<Gherkin>.
86+
87+
=cut

perl/lib/Gherkin/Parser.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ The C<$uri> parameter is expected to be passed in all but the third case.
7575
7676
=item * L<Gherkin::Dialect>
7777
78+
=item * L<Gherkin::MarkdownTokenMatcher>
79+
7880
=item * L<Gherkin::TokenMatcher>
7981
8082
=item * L<Gherkin::TokenScanner>

perl/lib/Gherkin/TokenMatcher.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,8 @@ scanner with a C<reset> call in-between.
351351
352352
=item * L<Gherkin::Dialect>
353353
354+
=item * L<Gherkin::MarkdownTokenMatcher>
355+
354356
=item * L<Gherkin::Parser>
355357
356358
=back

0 commit comments

Comments
 (0)