Skip to content

Commit edc86a5

Browse files
committed
bug fixes, new features, refactor
- fixing character '\' in %mik expressions - `patterns >>> function_name as res` implemented - this mikmatch extension takes all bound expressions to the left (in order) and passes them to `function_name` - and the result from this application is named `res` - `let re = {%mik|some regex|}` syntax implemented - `match str with | {%mik|some regex|}` implemented - inside a regular match case, only patterns with %mik inside the string definition are compiled to an RE. - raw string matching allowed - guards allowed - currently they aren't grouped by guard, so the performance is worse
1 parent f6b230c commit edc86a5

File tree

8 files changed

+566
-400
lines changed

8 files changed

+566
-400
lines changed

common/mik_lexer.mll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ let predefined_classes = [
1818
("cntrl", {|[[:cntrl:]]|});
1919
("xdigit", {|[[:xdigit:]]|});
2020
("space", {|[[:space:]]|});
21-
("word", {|[[:word:]]|});
21+
(* ("word", {|[[:word:]]|}); *)
2222
("eos", {|$|});
2323
("eol", {|$|[\n]|});
2424
("bnd", {|\b|});
@@ -82,8 +82,10 @@ rule token = parse
8282
| ':' { COLON }
8383
| '=' { EQUAL }
8484
| "as" { AS }
85+
| ">>>" { PIPE }
8586
| "int" { INT_CONVERTER }
8687
| "float" { FLOAT_CONVERTER }
88+
| "$" { PREDEFINED_CLASS "$" }
8789
| digit+ as n { INT (int_of_string n) }
8890
| module_ident as id { MOD_IDENT id }
8991
| ident as id {
@@ -97,6 +99,10 @@ rule token = parse
9799
| _ as c { raise (Error ("Unexpected character: " ^ String.make 1 c)) }
98100

99101
and char_literal buf = parse
102+
| '\\' '\\' {
103+
Buffer.add_string buf "\\\\";
104+
char_literal buf lexbuf
105+
}
100106
| '\\' (_ as c) {
101107
Buffer.add_char buf (escape_char c);
102108
char_literal buf lexbuf

common/mik_parser.mly

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ let unclosed_error what startpos endpos =
4343
%token <string> CHAR_LITERAL STRING_LITERAL IDENT MOD_IDENT PREDEFINED_CLASS
4444
%token <int> INT
4545
%token SLASH LPAREN RPAREN LBRACKET RBRACKET CARET LBRACE RBRACE
46-
%token DASH BAR STAR PLUS QUESTION UNDERSCORE COLON EQUAL AS
46+
%token DASH BAR STAR PLUS QUESTION UNDERSCORE COLON EQUAL AS PIPE
4747
%token INT_CONVERTER FLOAT_CONVERTER EOF
4848

4949
%start <string t> main_match_case
@@ -69,6 +69,13 @@ main_let_expr:
6969

7070
pattern:
7171
| alt_expr { $1 }
72+
| alt_expr PIPE func = func_name AS name = IDENT {
73+
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
74+
wrap_loc $startpos $endpos (Pipe_all (name_loc, func, $1))
75+
}
76+
| alt_expr PIPE { missing_error "function name after '|>'" $startpos($2) $endpos }
77+
| alt_expr PIPE func_name { missing_error "'as' and result name after function" $startpos($3) $endpos }
78+
| alt_expr PIPE func_name AS { missing_error "result name after 'as'" $startpos($4) $endpos }
7279
| { missing_error "pattern expression" $startpos $endpos }
7380

7481
alt_expr:
@@ -132,6 +139,9 @@ basic_atom:
132139
| UNDERSCORE {
133140
to_pcre_regex "." $startpos $endpos
134141
}
142+
| CARET {
143+
to_pcre_regex "^" $startpos $endpos
144+
}
135145
| PREDEFINED_CLASS {
136146
to_pcre_regex $1 $startpos $endpos
137147
}
@@ -187,8 +197,7 @@ basic_atom:
187197
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
188198
wrap_loc $startpos $endpos (Named_subs (ident_loc, Some name_loc, Some Float, pattern_node))
189199
}
190-
| LPAREN IDENT AS name = IDENT COLON EQUAL func = MOD_IDENT RPAREN
191-
| LPAREN IDENT AS name = IDENT COLON EQUAL func = IDENT RPAREN {
200+
| LPAREN IDENT AS name = IDENT COLON EQUAL func = func_name RPAREN {
192201
let ident_loc = wrap_loc $startpos($2) $endpos($2) $2 in
193202
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
194203
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
@@ -223,8 +232,7 @@ basic_atom:
223232
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
224233
wrap_loc $startpos $endpos (Capture_as (name_loc, Some Float, $2))
225234
}
226-
| LPAREN pattern AS name = IDENT COLON EQUAL func = MOD_IDENT RPAREN
227-
| LPAREN pattern AS name = IDENT COLON EQUAL func = IDENT RPAREN {
235+
| LPAREN pattern AS name = IDENT COLON EQUAL func = func_name RPAREN {
228236
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
229237
wrap_loc $startpos $endpos (Capture_as (name_loc, Some (Func func), $2))
230238
}
@@ -238,8 +246,13 @@ basic_atom:
238246
unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($6)
239247
}
240248

249+
241250
| LPAREN error { syntax_error "Invalid expression in parentheses" $startpos($2) $endpos }
242251

252+
func_name:
253+
| IDENT { $1 }
254+
| MOD_IDENT { $1 }
255+
243256
char_set:
244257
| char_set_item { $1 }
245258
| char_set_item char_set { $1 ^ $2 }

common/regexp_types.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ and 'a node =
1616
| Capture_as of string Location.loc * conv_ty option * 'a t
1717
| Named_subs of string Location.loc * string Location.loc option * conv_ty option * 'a t
1818
| Unnamed_subs of string Location.loc * 'a t
19+
| Pipe_all of string Location.loc * string * 'a t
1920
| Call of Longident.t Location.loc
2021
(* TODO: | Case_sense of t | Case_blind of t *)
2122

2223
and conv_ty =
2324
| Int
2425
| Float
2526
| Func of string
27+
| Pipe_all_func of string

common/regexp_types.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ and 'a node =
1616
| Capture_as of string Location.loc * conv_ty option * 'a t
1717
| Named_subs of string Location.loc * string Location.loc option * conv_ty option * 'a t
1818
| Unnamed_subs of string Location.loc * 'a t
19+
| Pipe_all of string Location.loc * string * 'a t
1920
| Call of Longident.t Location.loc
2021
(* TODO: | Case_sense of t | Case_blind of t *)
2122

2223
and conv_ty =
2324
| Int
2425
| Float
2526
| Func of string
27+
| Pipe_all_func of string

ppx_regexp/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
(name ppx_regexp)
33
(public_name ppx_regexp_extended)
44
(kind ppx_rewriter)
5-
(modules regexp_types ppx_regexp regexp mik_parser mik_lexer)
5+
(modules util transformations regexp_types ppx_regexp regexp mik_parser mik_lexer)
66
(preprocess (pps ppxlib.metaquot))
77
(libraries ppxlib re re.perl)
88
(ppx_runtime_libraries re re.perl))

0 commit comments

Comments
 (0)