Skip to content

Commit 6c9adf9

Browse files
giacomocavalierilpil
authored andcommitted
optimise code generation for pattern matching on aliased string prefix
this closes #5039
1 parent f42c180 commit 6c9adf9

13 files changed

+137
-80
lines changed

compiler-core/src/erlang.rs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,7 @@ fn let_assert<'a>(
12621262
environment,
12631263
variables,
12641264
guards,
1265+
assignments,
12651266
} = pattern_printer;
12661267

12671268
let clause_guard = optional_clause_guard(None, guards, environment);
@@ -1328,6 +1329,13 @@ fn let_assert<'a>(
13281329
]
13291330
.nest(INDENT)
13301331
];
1332+
1333+
let assignments = if assignments.is_empty() {
1334+
nil()
1335+
} else {
1336+
docvec![",", line(), join(assignments, ",".to_doc().append(line()))]
1337+
};
1338+
13311339
docvec![
13321340
subject_assignment,
13331341
assignment,
@@ -1337,6 +1345,7 @@ fn let_assert<'a>(
13371345
docvec![line(), clauses].nest(INDENT),
13381346
line(),
13391347
"end",
1348+
assignments,
13401349
]
13411350
}
13421351

@@ -1577,11 +1586,12 @@ fn clause<'a>(clause: &'a TypedClause, environment: &mut Env<'a>) -> Document<'a
15771586
environment,
15781587
guards,
15791588
variables: _,
1589+
assignments,
15801590
} = pattern_printer;
15811591

15821592
let guard = optional_clause_guard(guard.as_ref(), guards, environment);
15831593
if then_doc.is_none() {
1584-
then_doc = Some(clause_consequence(then, environment));
1594+
then_doc = Some(clause_consequence(then, assignments, environment));
15851595
end_erlang_vars = environment.erl_function_scope_vars.clone();
15861596
}
15871597

@@ -1598,11 +1608,25 @@ fn clause<'a>(clause: &'a TypedClause, environment: &mut Env<'a>) -> Document<'a
15981608
doc
15991609
}
16001610

1601-
fn clause_consequence<'a>(consequence: &'a TypedExpr, env: &mut Env<'a>) -> Document<'a> {
1602-
match consequence {
1611+
fn clause_consequence<'a>(
1612+
consequence: &'a TypedExpr,
1613+
// Further assignments that the pattern might need to introduce at the start
1614+
// of the new block.
1615+
assignments: Vec<Document<'a>>,
1616+
env: &mut Env<'a>,
1617+
) -> Document<'a> {
1618+
let assignment_doc = if assignments.is_empty() {
1619+
nil()
1620+
} else {
1621+
let separator = ",".to_doc().append(line());
1622+
join(assignments, separator.clone()).append(separator)
1623+
};
1624+
1625+
let consequence = match consequence {
16031626
TypedExpr::Block { statements, .. } => statement_sequence(statements, env),
16041627
_ => expr(consequence, env),
1605-
}
1628+
};
1629+
assignment_doc.append(consequence)
16061630
}
16071631

16081632
fn optional_clause_guard<'a>(

compiler-core/src/erlang/pattern.rs

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ pub(super) struct PatternPrinter<'a, 'env> {
88
pub environment: &'env mut Env<'a>,
99
pub variables: Vec<&'a str>,
1010
pub guards: Vec<Document<'a>>,
11+
/// In case we're dealing with string patterns, we might have something like
12+
/// this: `"a" as letter <> rest`. In this case we want to compile it to
13+
/// `<<"a"/utf8, rest/binary>>` and then bind a variable to `"a"`.
14+
/// This way it's easier for the erlang compiler to optimise the pattern
15+
/// matching.
16+
pub assignments: Vec<Document<'a>>,
1117
}
1218

1319
impl<'a, 'env> PatternPrinter<'a, 'env> {
@@ -16,6 +22,7 @@ impl<'a, 'env> PatternPrinter<'a, 'env> {
1622
environment,
1723
variables: vec![],
1824
guards: vec![],
25+
assignments: vec![],
1926
}
2027
}
2128

@@ -89,43 +96,32 @@ impl<'a, 'env> PatternPrinter<'a, 'env> {
8996
AssignName::Discard(_) => "_".to_doc(),
9097
};
9198

92-
match left_side_assignment {
93-
Some((left_name, _)) => {
94-
// "wibble" as prefix <> rest
95-
// ^^^^^^^^^ In case the left prefix of the pattern matching is given an alias
96-
// we bind it to a local variable so that it can be correctly
97-
// referenced inside the case branch.
98-
//
99-
// <<Prefix:3/binary, Rest/binary>> when Prefix =:= <<"wibble">>
100-
// ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
101-
// since erlang's binary pattern matching doesn't allow direct string assignment
102-
// to variables within the pattern, we first match the expected prefix length in
103-
// bytes, then use a guard clause to verify the content.
104-
//
105-
self.variables.push(left_name);
106-
let name = self.environment.next_local_var_name(left_name);
107-
self.guards
108-
.push(docvec![name.clone(), " =:= ", string(left_side_string)]);
109-
docvec![
110-
"<<",
111-
name.clone(),
112-
":",
113-
string_length_utf8_bytes(left_side_string),
114-
"/binary",
115-
", ",
116-
right,
117-
"/binary>>",
118-
]
119-
}
120-
None => docvec![
121-
"<<\"",
122-
string_inner(left_side_string),
123-
"\"/utf8",
124-
", ",
125-
right,
126-
"/binary>>"
127-
],
99+
if let Some((left_name, _)) = left_side_assignment {
100+
// "wibble" as prefix <> rest
101+
// ^^^^^^^^^ In case the left prefix of the pattern matching is given an alias
102+
// we bind it to a local variable so that it can be correctly
103+
// referenced inside the case branch.
104+
//
105+
// So we will end up with something that looks like this:
106+
//
107+
// <<"wibble"/binary, Rest/binary>> ->
108+
// Prefix = "wibble",
109+
// ...
110+
//
111+
self.variables.push(left_name);
112+
let name = self.environment.next_local_var_name(left_name);
113+
self.assignments
114+
.push(docvec![name, " = ", string(left_side_string)]);
128115
}
116+
117+
docvec![
118+
"<<\"",
119+
string_inner(left_side_string),
120+
"\"/utf8",
121+
", ",
122+
right,
123+
"/binary>>"
124+
]
129125
}
130126

131127
Pattern::Invalid { .. } => panic!("invalid patterns should not reach code generation"),

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__let_assert__string_prefix_pattern_with_prefix_binding.snap

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@ pub fn go() {
1818
-spec go() -> {binary(), binary()}.
1919
go() ->
2020
{Name@1, Greeting@1} = case <<"Hello John"/utf8>> of
21-
<<Greeting:6/binary, Name/binary>> when Greeting =:= <<"Hello "/utf8>> -> {
22-
Name,
23-
Greeting};
21+
<<"Hello "/utf8, Name/binary>> -> {Name, Greeting};
2422
_assert_fail ->
2523
erlang:error(#{gleam_error => let_assert,
2624
message => <<"Pattern match failed, no pattern matched the value."/utf8>>,
@@ -34,4 +32,5 @@ go() ->
3432
pattern_start => 27,
3533
pattern_end => 55})
3634
end,
35+
Greeting = <<"Hello "/utf8>>,
3736
{Greeting@1, Name@1}.

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__patterns__string_prefix_as_pattern_with_assertion.snap

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub fn a(x) {
1818
-spec a(any()) -> binary().
1919
a(X) ->
2020
{Rest@1, A@1} = case <<"wibble"/utf8>> of
21-
<<A:1/binary, Rest/binary>> when A =:= <<"a"/utf8>> -> {Rest, A};
21+
<<"a"/utf8, Rest/binary>> -> {Rest, A};
2222
_assert_fail ->
2323
erlang:error(#{gleam_error => let_assert,
2424
message => <<"Pattern match failed, no pattern matched the value."/utf8>>,
@@ -32,4 +32,5 @@ a(X) ->
3232
pattern_start => 27,
3333
pattern_end => 43})
3434
end,
35+
A = <<"a"/utf8>>,
3536
A@1.

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__patterns__string_prefix_as_pattern_with_list.snap

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ pub fn a(x) {
2020
-spec a(list(binary())) -> binary().
2121
a(X) ->
2222
case X of
23-
[<<A:1/binary, _/binary>>, <<B:1/binary, _/binary>>] when (A =:= <<"a"/utf8>>) andalso (B =:= <<"b"/utf8>>) ->
23+
[<<"a"/utf8, _/binary>>, <<"b"/utf8, _/binary>>] ->
24+
A = <<"a"/utf8>>,
25+
B = <<"b"/utf8>>,
2426
<<A/binary, B/binary>>;
2527

2628
_ ->

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__patterns__string_prefix_as_pattern_with_multiple_subjects.snap

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ pub fn a(x) {
2020
-spec a(binary()) -> binary().
2121
a(X) ->
2222
case {X, X} of
23-
{_, <<A:1/binary, _/binary>>} when A =:= <<"a"/utf8>> ->
23+
{_, <<"a"/utf8, _/binary>>} ->
24+
A = <<"a"/utf8>>,
2425
A;
2526

2627
{_, _} ->

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__patterns__string_prefix_as_pattern_with_multiple_subjects_and_guard.snap

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ pub fn a(x) {
2020
-spec a(binary()) -> binary().
2121
a(X) ->
2222
case {X, X} of
23-
{_, <<A:1/binary, Rest/binary>>} when (A =:= <<"a"/utf8>>) andalso (Rest =:= <<"a"/utf8>>) ->
23+
{_, <<"a"/utf8, Rest/binary>>} when Rest =:= <<"a"/utf8>> ->
24+
A = <<"a"/utf8>>,
2425
A;
2526

2627
{_, _} ->

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__strings__string_prefix_assignment.snap

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ pub fn go(x) {
2222
-spec go(binary()) -> binary().
2323
go(X) ->
2424
case X of
25-
<<Greeting:7/binary, Name/binary>> when Greeting =:= <<"Hello, "/utf8>> ->
25+
<<"Hello, "/utf8, Name/binary>> ->
26+
Greeting = <<"Hello, "/utf8>>,
2627
Greeting;
2728

2829
_ ->

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__strings__string_prefix_assignment_not_unicode_escape_sequence.snap

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,34 +31,44 @@ pub fn go(x) {
3131
-spec go(binary()) -> binary().
3232
go(X) ->
3333
_ = case X of
34-
<<Start:5/binary, Rest/binary>> when Start =:= <<"\\u{9}"/utf8>> ->
34+
<<"\\u{9}"/utf8, Rest/binary>> ->
35+
Start = <<"\\u{9}"/utf8>>,
3536
<<"test"/utf8>>;
3637

37-
<<Start@1:10/binary, Rest@1/binary>> when Start@1 =:= <<"\\u{000009}"/utf8>> ->
38+
<<"\\u{000009}"/utf8, Rest@1/binary>> ->
39+
Start@1 = <<"\\u{000009}"/utf8>>,
3840
<<"test"/utf8>>;
3941

40-
<<Start@2:6/binary, Rest@2/binary>> when Start@2 =:= <<"\\u{21}"/utf8>> ->
42+
<<"\\u{21}"/utf8, Rest@2/binary>> ->
43+
Start@2 = <<"\\u{21}"/utf8>>,
4144
<<"test"/utf8>>;
4245

43-
<<Start@3:7/binary, Rest@3/binary>> when Start@3 =:= <<"\\u{100}"/utf8>> ->
46+
<<"\\u{100}"/utf8, Rest@3/binary>> ->
47+
Start@3 = <<"\\u{100}"/utf8>>,
4448
<<"test"/utf8>>;
4549

46-
<<Start@4:8/binary, Rest@4/binary>> when Start@4 =:= <<"\\u{1000}"/utf8>> ->
50+
<<"\\u{1000}"/utf8, Rest@4/binary>> ->
51+
Start@4 = <<"\\u{1000}"/utf8>>,
4752
<<"test"/utf8>>;
4853

49-
<<Start@5:9/binary, Rest@5/binary>> when Start@5 =:= <<"\\u{1F600}"/utf8>> ->
54+
<<"\\u{1F600}"/utf8, Rest@5/binary>> ->
55+
Start@5 = <<"\\u{1F600}"/utf8>>,
5056
<<"test"/utf8>>;
5157

52-
<<Start@6:9/binary, Rest@6/binary>> when Start@6 =:= <<"\\u{1f600}"/utf8>> ->
58+
<<"\\u{1f600}"/utf8, Rest@6/binary>> ->
59+
Start@6 = <<"\\u{1f600}"/utf8>>,
5360
<<"test"/utf8>>;
5461

55-
<<Start@7:10/binary, Rest@7/binary>> when Start@7 =:= <<"\\u{01F600}"/utf8>> ->
62+
<<"\\u{01F600}"/utf8, Rest@7/binary>> ->
63+
Start@7 = <<"\\u{01F600}"/utf8>>,
5664
<<"test"/utf8>>;
5765

58-
<<Start@8:10/binary, Rest@8/binary>> when Start@8 =:= <<"\\u{01f600}"/utf8>> ->
66+
<<"\\u{01f600}"/utf8, Rest@8/binary>> ->
67+
Start@8 = <<"\\u{01f600}"/utf8>>,
5968
<<"test"/utf8>>;
6069

61-
<<Start@9:61/binary, Rest@9/binary>> when Start@9 =:= <<"\\u{9} \\u{000009} \\u{21} \\u{100} \\u{1000} \\u{1F600} \\u{01F600}"/utf8>> ->
70+
<<"\\u{9} \\u{000009} \\u{21} \\u{100} \\u{1000} \\u{1F600} \\u{01F600}"/utf8, Rest@9/binary>> ->
71+
Start@9 = <<"\\u{9} \\u{000009} \\u{21} \\u{100} \\u{1000} \\u{1F600} \\u{01F600}"/utf8>>,
6272
<<"test"/utf8>>;
6373

6474
_ ->

compiler-core/src/erlang/tests/snapshots/gleam_core__erlang__tests__strings__string_prefix_assignment_with_escape_sequences.snap

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,55 +38,72 @@ pub fn go(x) {
3838
-spec go(binary()) -> binary().
3939
go(X) ->
4040
_ = case X of
41-
<<Start:1/binary, Rest/binary>> when Start =:= <<"\f"/utf8>> ->
41+
<<"\f"/utf8, Rest/binary>> ->
42+
Start = <<"\f"/utf8>>,
4243
<<"test"/utf8>>;
4344

44-
<<Start@1:1/binary, Rest@1/binary>> when Start@1 =:= <<"\n"/utf8>> ->
45+
<<"\n"/utf8, Rest@1/binary>> ->
46+
Start@1 = <<"\n"/utf8>>,
4547
<<"test"/utf8>>;
4648

47-
<<Start@2:1/binary, Rest@2/binary>> when Start@2 =:= <<"\r"/utf8>> ->
49+
<<"\r"/utf8, Rest@2/binary>> ->
50+
Start@2 = <<"\r"/utf8>>,
4851
<<"test"/utf8>>;
4952

50-
<<Start@3:1/binary, Rest@3/binary>> when Start@3 =:= <<"\t"/utf8>> ->
53+
<<"\t"/utf8, Rest@3/binary>> ->
54+
Start@3 = <<"\t"/utf8>>,
5155
<<"test"/utf8>>;
5256

53-
<<Start@4:1/binary, Rest@4/binary>> when Start@4 =:= <<"\""/utf8>> ->
57+
<<"\""/utf8, Rest@4/binary>> ->
58+
Start@4 = <<"\""/utf8>>,
5459
<<"test"/utf8>>;
5560

56-
<<Start@5:1/binary, Rest@5/binary>> when Start@5 =:= <<"\\"/utf8>> ->
61+
<<"\\"/utf8, Rest@5/binary>> ->
62+
Start@5 = <<"\\"/utf8>>,
5763
<<"test"/utf8>>;
5864

59-
<<Start@6:11/binary, Rest@6/binary>> when Start@6 =:= <<"\f \n \r \t \" \\"/utf8>> ->
65+
<<"\f \n \r \t \" \\"/utf8, Rest@6/binary>> ->
66+
Start@6 = <<"\f \n \r \t \" \\"/utf8>>,
6067
<<"control chars with prefix assignment"/utf8>>;
6168

62-
<<Start@7:1/binary, Rest@7/binary>> when Start@7 =:= <<"\x{9}"/utf8>> ->
69+
<<"\x{9}"/utf8, Rest@7/binary>> ->
70+
Start@7 = <<"\x{9}"/utf8>>,
6371
<<"test"/utf8>>;
6472

65-
<<Start@8:1/binary, Rest@8/binary>> when Start@8 =:= <<"\x{000009}"/utf8>> ->
73+
<<"\x{000009}"/utf8, Rest@8/binary>> ->
74+
Start@8 = <<"\x{000009}"/utf8>>,
6675
<<"test"/utf8>>;
6776

68-
<<Start@9:1/binary, Rest@9/binary>> when Start@9 =:= <<"\x{21}"/utf8>> ->
77+
<<"\x{21}"/utf8, Rest@9/binary>> ->
78+
Start@9 = <<"\x{21}"/utf8>>,
6979
<<"test"/utf8>>;
7080

71-
<<Start@10:2/binary, Rest@10/binary>> when Start@10 =:= <<"\x{100}"/utf8>> ->
81+
<<"\x{100}"/utf8, Rest@10/binary>> ->
82+
Start@10 = <<"\x{100}"/utf8>>,
7283
<<"test"/utf8>>;
7384

74-
<<Start@11:3/binary, Rest@11/binary>> when Start@11 =:= <<"\x{1000}"/utf8>> ->
85+
<<"\x{1000}"/utf8, Rest@11/binary>> ->
86+
Start@11 = <<"\x{1000}"/utf8>>,
7587
<<"test"/utf8>>;
7688

77-
<<Start@12:4/binary, Rest@12/binary>> when Start@12 =:= <<"\x{1F600}"/utf8>> ->
89+
<<"\x{1F600}"/utf8, Rest@12/binary>> ->
90+
Start@12 = <<"\x{1F600}"/utf8>>,
7891
<<"test"/utf8>>;
7992

80-
<<Start@13:4/binary, Rest@13/binary>> when Start@13 =:= <<"\x{1f600}"/utf8>> ->
93+
<<"\x{1f600}"/utf8, Rest@13/binary>> ->
94+
Start@13 = <<"\x{1f600}"/utf8>>,
8195
<<"test"/utf8>>;
8296

83-
<<Start@14:4/binary, Rest@14/binary>> when Start@14 =:= <<"\x{01F600}"/utf8>> ->
97+
<<"\x{01F600}"/utf8, Rest@14/binary>> ->
98+
Start@14 = <<"\x{01F600}"/utf8>>,
8499
<<"test"/utf8>>;
85100

86-
<<Start@15:4/binary, Rest@15/binary>> when Start@15 =:= <<"\x{01f600}"/utf8>> ->
101+
<<"\x{01f600}"/utf8, Rest@15/binary>> ->
102+
Start@15 = <<"\x{01f600}"/utf8>>,
87103
<<"test"/utf8>>;
88104

89-
<<Start@16:22/binary, Rest@16/binary>> when Start@16 =:= <<"\x{9} \x{000009} \x{21} \x{100} \x{1000} \x{1F600} \x{01F600}"/utf8>> ->
105+
<<"\x{9} \x{000009} \x{21} \x{100} \x{1000} \x{1F600} \x{01F600}"/utf8, Rest@16/binary>> ->
106+
Start@16 = <<"\x{9} \x{000009} \x{21} \x{100} \x{1000} \x{1F600} \x{01F600}"/utf8>>,
90107
<<"test"/utf8>>;
91108

92109
_ ->

0 commit comments

Comments
 (0)