Skip to content

Commit 0cf9a72

Browse files
authored
better substitution, let destruction fixes (#6)
* better substitution - more akin to ppx_tyre * fixed let destructure * fix let destructure conversions
1 parent dddb942 commit 0cf9a72

File tree

8 files changed

+248
-236
lines changed

8 files changed

+248
-236
lines changed

lib/mik_lexer.mll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ let needs_escape = function
5757
| _ -> false
5858
}
5959

60-
6160
let whitespace = [' ' '\t' '\r']
6261
let lowercase = ['a'-'z']
6362
let uppercase = ['A'-'Z']

lib/mik_parser.mly

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ let missing_error what startpos endpos =
3939
let unclosed_error what startpos endpos =
4040
syntax_error (Printf.sprintf "Unclosed %s" what) startpos endpos
4141

42+
let string_to_longident s =
43+
let open Ppxlib_ast.Ast in
44+
match String.split_on_char '.' s with
45+
| [] -> invalid_arg "string_to_longident: empty string"
46+
| [ x ] -> Lident x
47+
| x :: xs -> List.fold_left (fun acc s -> Ldot (acc, s)) (Lident x) xs
48+
49+
let last_component s =
50+
match List.rev (String.split_on_char '.' s) with
51+
| [] -> s
52+
| hd :: _ -> hd
53+
4254
let parse_flags s startpos endpos =
4355
let rec loop i flags =
4456
if i >= String.length s then flags
@@ -88,13 +100,13 @@ main_let_expr:
88100

89101
pattern:
90102
| alt_expr { $1 }
91-
| alt_expr PIPE func = func_name AS name = IDENT {
103+
| alt_expr PIPE func = ident AS name = IDENT {
92104
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
93105
wrap_loc $startpos $endpos (Pipe_all (name_loc, func, $1))
94106
}
95107
| alt_expr PIPE { missing_error "function name after '>>>'" $startpos($2) $endpos }
96-
| alt_expr PIPE func_name { missing_error "'as' and result name after function" $startpos($3) $endpos }
97-
| alt_expr PIPE func_name AS { missing_error "result name after 'as'" $startpos($4) $endpos }
108+
| alt_expr PIPE ident { missing_error "'as' and result name after function" $startpos($3) $endpos }
109+
| alt_expr PIPE ident AS { missing_error "result name after 'as'" $startpos($4) $endpos }
98110
| { missing_error "pattern expression" $startpos $endpos }
99111

100112
alt_expr:
@@ -171,10 +183,9 @@ basic_atom:
171183
| PREDEFINED_CLASS {
172184
to_pcre_regex $1 $startpos $endpos
173185
}
174-
| IDENT {
175-
let ident_loc = wrap_loc $startpos $endpos $1 in
176-
let pattern_node = to_pcre_regex $1 $startpos $endpos in
177-
wrap_loc $startpos $endpos (Unnamed_subs (ident_loc, pattern_node))
186+
| IDENT | MOD_IDENT {
187+
let ident_loc = wrap_loc $startpos $endpos (string_to_longident $1) in
188+
wrap_loc $startpos $endpos (Call ident_loc)
178189
}
179190

180191
| LBRACKET char_set RBRACKET {
@@ -190,55 +201,70 @@ basic_atom:
190201
| LBRACKET char_set { unclosed_error "character set (missing ']')" $startpos($1) $endpos }
191202
| LBRACKET error { syntax_error "Invalid character set" $startpos $endpos }
192203

193-
| LPAREN pattern RPAREN {
194-
$2
195-
}
196-
| LPAREN RPAREN { missing_error "pattern inside parentheses" $startpos $endpos }
197-
| LPAREN pattern EOF? { unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($2) }
198-
199-
| LPAREN IDENT RPAREN {
200-
let ident_loc = wrap_loc $startpos($2) $endpos($2) $2 in
201-
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
202-
wrap_loc $startpos $endpos (Named_subs (ident_loc, None, None, pattern_node))
203-
}
204-
| LPAREN IDENT AS RPAREN { missing_error "name after 'as'" $startpos($3) $endpos($4) }
205-
| LPAREN IDENT AS name = IDENT RPAREN {
206-
let ident_loc = wrap_loc $startpos($2) $endpos($2) $2 in
204+
| LPAREN id = IDENT RPAREN
205+
| LPAREN id = MOD_IDENT RPAREN {
206+
(* (word) -> captures the result of calling 'word' pattern *)
207+
let call_loc = wrap_loc $startpos(id) $endpos(id) (string_to_longident id) in
208+
let call_node = wrap_loc $startpos(id) $endpos(id) (Call call_loc) in
209+
let name_loc = wrap_loc $startpos(id) $endpos(id) (last_component id) in
210+
wrap_loc $startpos $endpos (Capture_as (name_loc, None, call_node))
211+
}
212+
| LPAREN IDENT AS RPAREN | LPAREN MOD_IDENT AS RPAREN { missing_error "name after 'as'" $startpos($3) $endpos($4) }
213+
| LPAREN id = IDENT AS name = IDENT RPAREN
214+
| LPAREN id = MOD_IDENT AS name = IDENT RPAREN {
215+
(* (word as w) -> captures the result of calling 'word' as 'w' *)
216+
let call_loc = wrap_loc $startpos(id) $endpos(id) (string_to_longident id) in
217+
let call_node = wrap_loc $startpos(id) $endpos(id) (Call call_loc) in
207218
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
208-
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
209-
wrap_loc $startpos $endpos (Named_subs (ident_loc, Some name_loc, None, pattern_node))
219+
wrap_loc $startpos $endpos (Capture_as (name_loc, None, call_node))
210220
}
211-
| LPAREN IDENT AS IDENT COLON RPAREN {
221+
| LPAREN IDENT AS IDENT COLON RPAREN
222+
| LPAREN IDENT AS MOD_IDENT COLON RPAREN {
212223
missing_error "type converter after ':'" $startpos($5) $endpos($6)
213224
}
214-
| LPAREN IDENT AS name = IDENT COLON INT_CONVERTER RPAREN {
215-
let ident_loc = wrap_loc $startpos($2) $endpos($2) $2 in
225+
| LPAREN id = IDENT AS name = IDENT COLON INT_CONVERTER RPAREN
226+
| LPAREN id = MOD_IDENT AS name = IDENT COLON INT_CONVERTER RPAREN {
227+
(* (digits as n : int) -> captures 'digits' pattern as 'n' converted to int *)
228+
let call_loc = wrap_loc $startpos(id) $endpos(id) (string_to_longident id) in
229+
let call_node = wrap_loc $startpos(id) $endpos(id) (Call call_loc) in
216230
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
217-
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
218-
wrap_loc $startpos $endpos (Named_subs (ident_loc, Some name_loc, Some Int, pattern_node))
231+
wrap_loc $startpos $endpos (Capture_as (name_loc, Some Int, call_node))
219232
}
220-
| LPAREN IDENT AS name = IDENT COLON FLOAT_CONVERTER RPAREN {
221-
let ident_loc = wrap_loc $startpos($2) $endpos($2) $2 in
233+
| LPAREN id = IDENT AS name = IDENT COLON FLOAT_CONVERTER RPAREN
234+
| LPAREN id = MOD_IDENT AS name = IDENT COLON FLOAT_CONVERTER RPAREN {
235+
(* (number as f : float) -> captures 'number' pattern as 'f' converted to float *)
236+
let call_loc = wrap_loc $startpos(id) $endpos(id) (string_to_longident id) in
237+
let call_node = wrap_loc $startpos(id) $endpos(id) (Call call_loc) in
222238
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
223-
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
224-
wrap_loc $startpos $endpos (Named_subs (ident_loc, Some name_loc, Some Float, pattern_node))
239+
wrap_loc $startpos $endpos (Capture_as (name_loc, Some Float, call_node))
225240
}
226-
| LPAREN IDENT AS name = IDENT COLON EQUAL func = func_name RPAREN {
227-
let ident_loc = wrap_loc $startpos($2) $endpos($2) $2 in
241+
| LPAREN id = IDENT AS name = IDENT COLON EQUAL func = ident RPAREN
242+
| LPAREN id = MOD_IDENT AS name = IDENT COLON EQUAL func = ident RPAREN {
243+
(* (text as t := process) -> captures 'text' pattern as 't' processed by function *)
244+
let call_loc = wrap_loc $startpos(id) $endpos(id) (string_to_longident id) in
245+
let call_node = wrap_loc $startpos(id) $endpos(id) (Call call_loc) in
228246
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
229-
let pattern_node = to_pcre_regex $2 $startpos($2) $endpos($2) in
230-
wrap_loc $startpos $endpos (Named_subs (ident_loc, Some name_loc, Some (Func func), pattern_node))
247+
wrap_loc $startpos $endpos (Capture_as (name_loc, Some (Func func), call_node))
231248
}
232-
| LPAREN IDENT AS IDENT EOF? {
249+
| LPAREN IDENT AS IDENT EOF?
250+
| LPAREN MOD_IDENT AS IDENT EOF? {
233251
unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($4)
234252
}
235-
| LPAREN IDENT AS IDENT COLON INT_CONVERTER EOF? {
253+
| LPAREN IDENT AS IDENT COLON INT_CONVERTER EOF?
254+
| LPAREN MOD_IDENT AS IDENT COLON INT_CONVERTER EOF? {
236255
unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($6)
237256
}
238-
| LPAREN IDENT AS IDENT COLON FLOAT_CONVERTER EOF? {
257+
| LPAREN IDENT AS IDENT COLON FLOAT_CONVERTER EOF?
258+
| LPAREN MOD_IDENT AS IDENT COLON FLOAT_CONVERTER EOF? {
239259
unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($6)
240260
}
241261

262+
| LPAREN pattern RPAREN {
263+
$2
264+
}
265+
| LPAREN RPAREN { missing_error "pattern inside parentheses" $startpos $endpos }
266+
| LPAREN pattern EOF? { unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($2) }
267+
242268
| LPAREN pattern AS RPAREN { missing_error "capture name after 'as'" $startpos($3) $endpos($4) }
243269
| LPAREN pattern AS name = IDENT RPAREN {
244270
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
@@ -258,7 +284,7 @@ basic_atom:
258284
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
259285
wrap_loc $startpos $endpos (Capture_as (name_loc, Some Float, $2))
260286
}
261-
| LPAREN pattern AS name = IDENT COLON EQUAL func = func_name RPAREN {
287+
| LPAREN pattern AS name = IDENT COLON EQUAL func = ident RPAREN {
262288
let name_loc = wrap_loc $startpos(name) $endpos(name) name in
263289
wrap_loc $startpos $endpos (Capture_as (name_loc, Some (Func func), $2))
264290
}
@@ -272,10 +298,9 @@ basic_atom:
272298
unclosed_error "parentheses (missing ')')" $startpos($1) $endpos($6)
273299
}
274300

275-
276301
| LPAREN error { syntax_error "Invalid expression in parentheses" $startpos($2) $endpos }
277302

278-
func_name:
303+
ident:
279304
| IDENT { $1 }
280305
| MOD_IDENT { $1 }
281306

lib/regexp.ml

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -244,33 +244,6 @@ let parse_exn ~target:_ ?(pos = Lexing.dummy_pos) s =
244244
else (
245245
let k, e = with_loc scan_alt (j + 1) in
246246
k, Capture_as (idr, None, e))
247-
| 'N' ->
248-
let j, idr = scan_ident (i + 3) in
249-
begin
250-
match get j with
251-
| '>' ->
252-
let k, e = with_loc scan_alt (j + 1) in
253-
k, Named_subs (idr, None, None, e)
254-
| ' ' ->
255-
let j, jdr = scan_ident (j + 1) in
256-
if jdr.txt = "as" && get j = ' ' then begin
257-
let j, kdr = scan_ident (j + 1) in
258-
if get j <> '>' then fail (j, j + 1) "Unbalanced '<'."
259-
else begin
260-
let k, e = with_loc scan_alt (j + 1) in
261-
k, Named_subs (idr, Some kdr, None, e)
262-
end
263-
end
264-
else fail (j - 2, j) "Substring name missing."
265-
| _ -> fail (i, i + 1) "Unbalanced '<'."
266-
end
267-
| 'U' ->
268-
let j, idr = scan_ident (i + 3) in
269-
if get j <> '>' then fail (i, i + 1) "Unbalanced '<'."
270-
else begin
271-
let k, e = with_loc scan_alt (j + 1) in
272-
k, Unnamed_subs (idr, e)
273-
end
274247
| 'i' ->
275248
if s.[i + 2] = ':' then (
276249
let j, e = with_loc scan_alt (i + 3) in

lib/regexp_types.ml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ and 'a node =
1515
| Caseless of 'a t
1616
| Capture of 'a t
1717
| Capture_as of string Location.loc * conv_ty option * 'a t
18-
| Named_subs of string Location.loc * string Location.loc option * conv_ty option * 'a t
19-
| Unnamed_subs of string Location.loc * 'a t
2018
| Pipe_all of string Location.loc * string * 'a t
2119
| Call of Longident.t Location.loc
2220
(* TODO: | Case_blind of t *)

lib/regexp_types.mli

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ and 'a node =
1515
| Caseless of 'a t
1616
| Capture of 'a t
1717
| Capture_as of string Location.loc * conv_ty option * 'a t
18-
| Named_subs of string Location.loc * string Location.loc option * conv_ty option * 'a t
19-
| Unnamed_subs of string Location.loc * 'a t
2018
| Pipe_all of string Location.loc * string * 'a t
2119
| Call of Longident.t Location.loc
2220
(* TODO: | Case_blind of t *)

0 commit comments

Comments
 (0)