Skip to content

Commit bbc10d4

Browse files
GearsDatapackslpil
authored andcommitted
Fix Erlang UTF-16 and UTF-32 pattern matching
1 parent 56c0729 commit bbc10d4

18 files changed

+669
-48
lines changed

compiler-core/src/erlang.rs

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,22 +1232,49 @@ fn let_assert<'a>(
12321232
};
12331233

12341234
let mut guards = vec![];
1235-
let pattern_document = pattern::to_doc(pattern, &mut vars, env, &mut guards);
1235+
let mut assignments = vec![];
1236+
let pattern_document = pattern::to_doc(pattern, &mut vars, env, &mut guards, &mut assignments);
12361237
let clause_guard = optional_clause_guard(None, guards, env);
12371238

12381239
let message = match message {
12391240
Some(message) => expr(message, env),
12401241
None => string("Pattern match failed, no pattern matched the value."),
12411242
};
12421243

1244+
let mut variable_value = |name| {
1245+
for assignment in assignments.iter() {
1246+
// If we are compiling a pattern match such as the following:
1247+
// ```gleam
1248+
// let assert <<"Hello" as m:utf16>> = x
1249+
// ```
1250+
//
1251+
// We could do the same thing as we do in `case` expressions:
1252+
// ```erlang
1253+
// M@2 = case X of
1254+
// <<M:10/binary>> when M =:= <<"Hello"/utf16>> ->
1255+
// M@1 = <<"Hello"/utf8>>,
1256+
// M@1;
1257+
// _ -> erlang:error(...)
1258+
// end
1259+
// ```
1260+
//
1261+
// However, since we are always immediately returning the value
1262+
// assigned to this temporary variable, it is simpler to just return
1263+
// the value itself, rather then assigning then returning.
1264+
//
1265+
if assignment.variable == name {
1266+
return assignment.value.clone();
1267+
}
1268+
}
1269+
env.local_var_name(name)
1270+
};
1271+
12431272
let value = match vars.as_slice() {
12441273
_ if is_tail => subject.clone(),
12451274
[] => "nil".to_doc(),
1246-
[variable] => env.local_var_name(variable),
1275+
[variable] => variable_value(variable),
12471276
variables => {
1248-
let variables = variables
1249-
.iter()
1250-
.map(|variable| env.local_var_name(variable));
1277+
let variables = variables.iter().map(|name| variable_value(name));
12511278
docvec![
12521279
break_("{", "{"),
12531280
join(variables, break_(",", ", ")).nest(INDENT),
@@ -1311,7 +1338,10 @@ fn let_assert<'a>(
13111338
fn let_<'a>(value: &'a TypedExpr, pat: &'a TypedPattern, env: &mut Env<'a>) -> Document<'a> {
13121339
let body = maybe_block_expr(value, env).group();
13131340
let mut guards = vec![];
1314-
pattern(pat, env, &mut guards).append(" = ").append(body)
1341+
let mut assignments = vec![];
1342+
pattern(pat, env, &mut guards, &mut assignments)
1343+
.append(" = ")
1344+
.append(body)
13151345
}
13161346

13171347
fn float<'a>(value: &str) -> Document<'a> {
@@ -1518,30 +1548,46 @@ fn clause<'a>(clause: &'a TypedClause, env: &mut Env<'a>) -> Document<'a> {
15181548
.chain(alternative_patterns)
15191549
.map(|patterns| {
15201550
let mut additional_guards = vec![];
1551+
let mut assignments = vec![];
15211552
env.erl_function_scope_vars = initial_erlang_vars.clone();
15221553

15231554
let patterns_doc = if patterns.len() == 1 {
15241555
let p = patterns.first().expect("Single pattern clause printing");
1525-
pattern(p, env, &mut additional_guards)
1556+
pattern(p, env, &mut additional_guards, &mut assignments)
15261557
} else {
15271558
tuple(
15281559
patterns
15291560
.iter()
1530-
.map(|p| pattern(p, env, &mut additional_guards)),
1561+
.map(|p| pattern(p, env, &mut additional_guards, &mut assignments)),
15311562
)
15321563
};
15331564

15341565
let guard = optional_clause_guard(guard.as_ref(), additional_guards, env);
1566+
1567+
let assignments = assignments
1568+
.into_iter()
1569+
.map(|assignment| {
1570+
docvec![
1571+
line(),
1572+
env.next_local_var_name(assignment.variable),
1573+
" = ",
1574+
assignment.value,
1575+
","
1576+
]
1577+
.nest(INDENT)
1578+
})
1579+
.collect_vec();
1580+
15351581
if then_doc.is_none() {
15361582
then_doc = Some(clause_consequence(then, env));
15371583
end_erlang_vars = env.erl_function_scope_vars.clone();
15381584
}
15391585

1540-
patterns_doc.append(
1541-
guard
1542-
.append(" ->")
1543-
.append(line().append(then_doc.clone()).nest(INDENT).group()),
1544-
)
1586+
patterns_doc
1587+
.append(guard)
1588+
.append(" ->")
1589+
.append(assignments)
1590+
.append(line().append(then_doc.clone()).nest(INDENT).group())
15451591
}),
15461592
";".to_doc().append(lines(2)),
15471593
);

compiler-core/src/erlang/pattern.rs

Lines changed: 71 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,40 @@ use crate::{
99

1010
use super::*;
1111

12+
pub(super) struct PatternAssignment<'a> {
13+
pub variable: &'a EcoString,
14+
pub value: Document<'a>,
15+
}
16+
1217
pub(super) fn pattern<'a>(
1318
p: &'a TypedPattern,
1419
env: &mut Env<'a>,
1520
guards: &mut Vec<Document<'a>>,
21+
assignments: &mut Vec<PatternAssignment<'a>>,
1622
) -> Document<'a> {
1723
let mut vars = vec![];
18-
to_doc(p, &mut vars, env, guards)
24+
to_doc(p, &mut vars, env, guards, assignments)
1925
}
2026

2127
fn print<'a>(
2228
p: &'a TypedPattern,
2329
vars: &mut Vec<&'a str>,
2430
env: &mut Env<'a>,
2531
guards: &mut Vec<Document<'a>>,
32+
assignments: &mut Vec<PatternAssignment<'a>>,
2633
) -> Document<'a> {
2734
match p {
2835
Pattern::Assign {
2936
name, pattern: p, ..
3037
} => {
3138
vars.push(name);
32-
print(p, vars, env, guards)
39+
print(p, vars, env, guards, assignments)
3340
.append(" = ")
3441
.append(env.next_local_var_name(name))
3542
}
3643

3744
Pattern::List { elements, tail, .. } => {
38-
pattern_list(elements, tail.as_deref(), vars, env, guards)
45+
pattern_list(elements, tail.as_deref(), vars, env, guards, assignments)
3946
}
4047

4148
Pattern::Discard { .. } => "_".to_doc(),
@@ -70,7 +77,7 @@ fn print<'a>(
7077
arguments: args,
7178
constructor: Inferred::Known(PatternConstructor { name, .. }),
7279
..
73-
} => tag_tuple_pattern(name, args, vars, env, guards),
80+
} => tag_tuple_pattern(name, args, vars, env, guards, assignments),
7481

7582
Pattern::Constructor {
7683
constructor: Inferred::Unknown,
@@ -79,14 +86,16 @@ fn print<'a>(
7986
panic!("Erlang generation performed with uninferred pattern constructor")
8087
}
8188

82-
Pattern::Tuple { elements, .. } => {
83-
tuple(elements.iter().map(|p| print(p, vars, env, guards)))
84-
}
89+
Pattern::Tuple { elements, .. } => tuple(
90+
elements
91+
.iter()
92+
.map(|p| print(p, vars, env, guards, assignments)),
93+
),
8594

8695
Pattern::BitArray { segments, .. } => bit_array(
8796
segments
8897
.iter()
89-
.map(|s| pattern_segment(&s.value, &s.options, vars, env, guards)),
98+
.map(|s| pattern_segment(s, vars, env, guards, assignments)),
9099
),
91100

92101
Pattern::StringPrefix {
@@ -150,8 +159,9 @@ pub(super) fn to_doc<'a>(
150159
vars: &mut Vec<&'a str>,
151160
env: &mut Env<'a>,
152161
guards: &mut Vec<Document<'a>>,
162+
assignments: &mut Vec<PatternAssignment<'a>>,
153163
) -> Document<'a> {
154-
print(p, vars, env, guards)
164+
print(p, vars, env, guards, assignments)
155165
}
156166

157167
fn tag_tuple_pattern<'a>(
@@ -160,39 +170,48 @@ fn tag_tuple_pattern<'a>(
160170
vars: &mut Vec<&'a str>,
161171
env: &mut Env<'a>,
162172
guards: &mut Vec<Document<'a>>,
173+
assignments: &mut Vec<PatternAssignment<'a>>,
163174
) -> Document<'a> {
164175
if args.is_empty() {
165176
atom_string(to_snake_case(name))
166177
} else {
167178
tuple(
168-
[atom_string(to_snake_case(name))]
169-
.into_iter()
170-
.chain(args.iter().map(|p| print(&p.value, vars, env, guards))),
179+
[atom_string(to_snake_case(name))].into_iter().chain(
180+
args.iter()
181+
.map(|p| print(&p.value, vars, env, guards, assignments)),
182+
),
171183
)
172184
}
173185
}
174186

175187
fn pattern_segment<'a>(
176-
value: &'a TypedPattern,
177-
options: &'a [BitArrayOption<TypedPattern>],
188+
segment: &'a TypedPatternBitArraySegment,
178189
vars: &mut Vec<&'a str>,
179190
env: &mut Env<'a>,
180191
guards: &mut Vec<Document<'a>>,
192+
assignments: &mut Vec<PatternAssignment<'a>>,
181193
) -> Document<'a> {
194+
let value = segment.value.as_ref();
195+
182196
let pattern_is_a_string_literal = matches!(value, Pattern::String { .. });
183197
let pattern_is_a_discard = matches!(value, Pattern::Discard { .. });
184198

185199
let vars = RefCell::new(vars);
186200
let guards = RefCell::new(guards);
201+
let assignments = RefCell::new(assignments);
187202

188203
let create_document = |env: &mut Env<'a>| match value {
189204
Pattern::String { value, .. } => value.to_doc().surround("\"", "\""),
190205
Pattern::Discard { .. }
191206
| Pattern::Variable { .. }
192207
| Pattern::Int { .. }
193-
| Pattern::Float { .. } => {
194-
print(value, &mut vars.borrow_mut(), env, &mut guards.borrow_mut())
195-
}
208+
| Pattern::Float { .. } => print(
209+
value,
210+
&mut vars.borrow_mut(),
211+
env,
212+
&mut guards.borrow_mut(),
213+
&mut assignments.borrow_mut(),
214+
),
196215

197216
Pattern::Assign { name, pattern, .. } => {
198217
vars.borrow_mut().push(name);
@@ -219,18 +238,38 @@ fn pattern_segment<'a>(
219238
// it afterwards.
220239
Pattern::String { value, .. } => {
221240
let escaped = convert_string_escape_chars(value);
222-
let (utf_option, string_length) = if options
223-
.iter()
224-
.any(|option| matches!(option, BitArrayOption::Utf16 { .. }))
225-
{
241+
let (utf_option, string_length) = if segment.has_utf16_option() {
242+
assignments.borrow_mut().push(PatternAssignment {
243+
variable: name,
244+
value: string(value),
245+
});
246+
247+
let option = if segment.has_native_option() {
248+
"utf16-native"
249+
} else if segment.endianness().is_big() {
250+
"utf16"
251+
} else {
252+
"utf16-little"
253+
};
254+
226255
// Each UTF-16 codepoint is 2 bytes
227-
("utf16", length_utf16(&escaped) * 2)
228-
} else if options
229-
.iter()
230-
.any(|option| matches!(option, BitArrayOption::Utf32 { .. }))
231-
{
256+
(option, length_utf16(&escaped) * 2)
257+
} else if segment.has_utf32_option() {
258+
assignments.borrow_mut().push(PatternAssignment {
259+
variable: name,
260+
value: string(value),
261+
});
262+
263+
let option = if segment.has_native_option() {
264+
"utf32-native"
265+
} else if segment.endianness().is_big() {
266+
"utf32"
267+
} else {
268+
"utf32-little"
269+
};
270+
232271
// Each UTF-32 codepoint is 4 bytes
233-
("utf32", length_utf32(&escaped) * 4)
272+
(option, length_utf32(&escaped) * 4)
234273
} else {
235274
("utf8", escaped.len())
236275
};
@@ -266,14 +305,15 @@ fn pattern_segment<'a>(
266305
&mut vars.borrow_mut(),
267306
env,
268307
&mut guards.borrow_mut(),
308+
&mut assignments.borrow_mut(),
269309
)))
270310
};
271311

272312
let unit = |value: &'a u8| Some(eco_format!("unit:{value}").to_doc());
273313

274314
bit_array_segment(
275315
create_document,
276-
options,
316+
&segment.options,
277317
size,
278318
unit,
279319
pattern_is_a_string_literal,
@@ -288,13 +328,14 @@ fn pattern_list<'a>(
288328
vars: &mut Vec<&'a str>,
289329
env: &mut Env<'a>,
290330
guards: &mut Vec<Document<'a>>,
331+
assignments: &mut Vec<PatternAssignment<'a>>,
291332
) -> Document<'a> {
292333
let elements = join(
293334
elements
294335
.iter()
295-
.map(|element| print(element, vars, env, guards)),
336+
.map(|element| print(element, vars, env, guards, assignments)),
296337
break_(",", ", "),
297338
);
298-
let tail = tail.map(|tail| print(tail, vars, env, guards));
339+
let tail = tail.map(|tail| print(tail, vars, env, guards, assignments));
299340
list(elements, tail)
300341
}

0 commit comments

Comments
 (0)