Skip to content

Commit 5a639fa

Browse files
committed
Tweak Stroken construction to keep track of swallowed indentations
1 parent 63d1c8d commit 5a639fa

File tree

2 files changed

+119
-24
lines changed

2 files changed

+119
-24
lines changed

hkmc2/shared/src/main/scala/hkmc2/syntax/Lexer.scala

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ class Lexer(origin: Origin, dbg: Bool)(using raise: Raise):
372372
toks: Ls[Token -> Loc],
373373
canStartAngles: Bool,
374374
stack: Ls[BracketKind -> Loc -> Ls[Stroken -> Loc]],
375+
swallowedInd: Int, // * Number of previous indentations that were not closed by deindents but by closing other brackets
375376
acc: Ls[Stroken -> Loc],
376377
): Ls[Stroken -> Loc] =
377378
toks match
@@ -380,28 +381,32 @@ class Lexer(origin: Origin, dbg: Bool)(using raise: Raise):
380381
// * where there is no actual body after the `...`.
381382
// * It can't be handled in the parser because this is only valid at the top-level,
382383
// * not within brackets, as in `(arg0, ...) => blah`.
383-
go(OPEN_BRACKET(Indent) -> l0 :: LITVAL(Tree.UnitLit(false)) -> l0 :: Nil, false, stack, acc)
384+
go(OPEN_BRACKET(Indent) -> l0 :: LITVAL(Tree.UnitLit(false)) -> l0 :: Nil, false, stack, swallowedInd, acc)
384385
case (QUOTE, l0) :: (IDENT("<", true), l1) :: rest =>
385-
go(rest, false, stack, (IDENT("<", true), l1) :: (QUOTE, l0) :: acc)
386+
go(rest, false, stack, swallowedInd, (IDENT("<", true), l1) :: (QUOTE, l0) :: acc)
386387
case (QUOTE, l0) :: (IDENT(">", true), l1) :: rest =>
387-
go(rest, false, stack, (IDENT(">", true), l1) :: (QUOTE, l0) :: acc)
388+
go(rest, false, stack, swallowedInd, (IDENT(">", true), l1) :: (QUOTE, l0) :: acc)
388389
case (OPEN_BRACKET(k), l0) :: rest =>
389-
go(rest, false, k -> l0 -> acc :: stack, Nil)
390+
go(rest, false, k -> l0 -> acc :: stack, swallowedInd, Nil)
391+
case (NEWLINE, l1) :: rest if swallowedInd > 0 =>
392+
go((OPEN_BRACKET(Indent), l1) :: rest, false, stack, swallowedInd - 1, acc)
393+
case (CLOSE_BRACKET(Indent), l1) :: rest if swallowedInd > 0 => go(rest, false, stack, swallowedInd - 1, acc)
390394
case (CLOSE_BRACKET(k1), l1) :: rest =>
391395
stack match
392-
case ((Indent, loc), oldAcc) :: _ if k1 =/= Indent =>
396+
case ((k0 @ Indent, l0), oldAcc) :: oldStack if k1 =/= Indent =>
393397
// * Sometimes, open/close parentheses are interleaved with indent/deindent; eg in
394398
// * module P with
395399
// * (
396400
// * 2)
397401
// * 1
398402
// * which results in token stream `|module| |P| |with|→|(|→|2|)|←|↵|1|`.
399-
// * So this code commutes the indent/deindent with the open/close parentheses.
400-
go(CLOSE_BRACKET(Indent) -> l1.left :: (CLOSE_BRACKET(k1), l1) :: OPEN_BRACKET(Indent) -> l1.right :: rest, false, stack, acc)
403+
// * So we temporarily swallow indentations until we reach a NL or deindent.
404+
go(toks, false, oldStack, swallowedInd + 1,
405+
BRACKETS(k0, acc.reverse)(l0.right ++ l1.left) -> (l0 ++ l1) :: oldAcc)
401406
case ((Indent, loc), oldAcc) :: stack
402407
if k1 === Indent && acc.forall { case (SPACE | NEWLINE, _) => true; case _ => false } =>
403408
// * Ignore empty indented blocks:
404-
go(rest, false, stack, oldAcc)
409+
go(rest, false, stack, swallowedInd, oldAcc)
405410
case ((k0, l0), oldAcc) :: stack =>
406411
if k0 =/= k1 && !(k0 === Unquote && k1 === Curly) then
407412
raise(ErrorReport(msg"Mistmatched closing ${k1.name}" -> S(l1) ::
@@ -421,43 +426,43 @@ class Lexer(origin: Origin, dbg: Bool)(using raise: Raise):
421426
case (NEWLINE, _) :: (BRACKETS(Indent, acc), _) :: Nil if k0 is Curly => acc
422427
case _ => acc.reverse
423428
val accr2 = accr.dropWhile(_._1 === SPACE)
424-
go(rest, true, stack, BRACKETS(k0, accr2)(l0.right ++ l1.left) -> (l0 ++ l1) :: oldAcc)
429+
go(rest, true, stack, swallowedInd, BRACKETS(k0, accr2)(l0.right ++ l1.left) -> (l0 ++ l1) :: oldAcc)
425430
case Nil =>
426431
raise(ErrorReport(msg"Unexpected closing ${k1.name}" -> S(l1) :: Nil,
427432
source = Parsing))
428-
go(rest, false, stack, acc)
433+
go(rest, false, stack, swallowedInd, acc)
429434
case (INDENT, loc) :: rest =>
430-
go(OPEN_BRACKET(Indent) -> loc :: rest, false, stack, acc)
435+
go(OPEN_BRACKET(Indent) -> loc :: rest, false, stack, swallowedInd, acc)
431436
case (DEINDENT, loc) :: rest =>
432-
go(CLOSE_BRACKET(Indent) -> loc :: rest, false, stack, acc)
437+
go(CLOSE_BRACKET(Indent) -> loc :: rest, false, stack, swallowedInd, acc)
433438
case (IDENT("<", true), loc) :: rest if canStartAngles =>
434-
go(OPEN_BRACKET(Angle) -> loc :: rest, false, stack, acc)
439+
go(OPEN_BRACKET(Angle) -> loc :: rest, false, stack, swallowedInd, acc)
435440
case (IDENT(">", true), loc) :: rest if canStartAngles && (stack match {
436441
case ((Angle, _), _) :: _ => true
437442
case _ => false
438443
}) =>
439-
go(CLOSE_BRACKET(Angle) -> loc :: rest, false, stack, acc)
444+
go(CLOSE_BRACKET(Angle) -> loc :: rest, false, stack, swallowedInd, acc)
440445
case (IDENT(id, true), loc) :: rest
441446
if (canStartAngles && id.forall(_ == '>') && id.length > 1 && (stack match {
442447
case ((Angle, _), _) :: _ => true
443448
case _ => false
444449
})) => // split `>>` to `>` and `>` so that code like `A<B<C>>` can be parsed correctly
445-
go((CLOSE_BRACKET(Angle) -> loc.left) :: (IDENT(id.drop(1), true) -> loc) :: rest, false, stack, acc)
450+
go((CLOSE_BRACKET(Angle) -> loc.left) :: (IDENT(id.drop(1), true) -> loc) :: rest, false, stack, swallowedInd, acc)
446451
case ((tk @ IDENT(">", true), loc)) :: rest if canStartAngles =>
447452
raise(WarningReport(
448453
msg"This looks like an angle bracket, but it does not close any angle bracket section" -> S(loc) ::
449454
msg"Add spaces around it if you intended to use `<` as an operator" -> N :: Nil,
450455
source = Parsing))
451-
go(rest, false, stack, tk -> loc :: acc)
456+
go(rest, false, stack,swallowedInd, tk -> loc :: acc)
452457
case (tk: Stroken, loc) :: rest =>
453458
go(rest, tk match {
454459
case SPACE | NEWLINE => false
455460
case _ => true
456-
}, stack, tk -> loc :: acc)
461+
}, stack, swallowedInd, tk -> loc :: acc)
457462
case Nil =>
458463
stack match
459464
case ((Indent, loc), oldAcc) :: _ =>
460-
go(CLOSE_BRACKET(Indent) -> loc/*FIXME not proper loc...*/ :: Nil, false, stack, acc)
465+
go(CLOSE_BRACKET(Indent) -> loc/*FIXME not proper loc...*/ :: Nil, false, stack, swallowedInd, acc)
461466
case ((k, l0), oldAcc) :: stack =>
462467
raise(ErrorReport(msg"Unmatched opening ${k.name}" -> S(l0) :: (
463468
if k === Angle then
@@ -467,7 +472,7 @@ class Lexer(origin: Origin, dbg: Bool)(using raise: Raise):
467472
(oldAcc ::: acc).reverse
468473
case Nil => acc.reverse
469474

470-
go(tokens, false, Nil, Nil)
475+
go(tokens, false, Nil, 0, Nil)
471476

472477

473478

hkmc2/shared/src/test/mlscript/basics/Indentation.mls

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,77 @@
11
:js
22

33

4+
tuple(
5+
)
6+
//│ = []
7+
8+
tuple(
9+
)
10+
tuple()
11+
//│ = [[]]
12+
13+
tuple(
14+
) tuple()
15+
//│ = [[]]
16+
17+
tuple(
18+
id(2
19+
+2))
20+
tuple()
21+
//│ = [[4]]
22+
23+
tuple(
24+
id(2
25+
+2))
26+
tuple()
27+
//│ = [[4]]
28+
29+
tuple(
30+
id(2
31+
+2))
32+
tuple()
33+
//│ = []
34+
35+
:fixme
36+
tuple(
37+
)
38+
//│ ╔══[PARSE ERROR] Expected an expression; found new line instead
39+
//│ ║ l.36: tuple(
40+
//│ ║ ^
41+
//│ ║ l.37: )
42+
//│ ╙──
43+
44+
tuple(
45+
)
46+
//│ = []
47+
448
print(
549
2)
650
1
751
//│ > 2
852
//│ = 1
953

54+
id(
55+
1)
56+
//│ = 1
57+
58+
id(
59+
id)(1)
60+
//│ = 1
61+
1062
:p
1163
id(
1264
2)
1365
print()
14-
//│ |id|(|→|2|←|)|→|↵|print|(||)|←|
66+
//│ |id|(|→|2|←|)|→|print|(||)|←|
1567
//│ Parsed:
1668
//│ Jux(App(Ident(id),Tup(List(IntLit(2)))),Block(List(App(Ident(print),Tup(List())))))
1769
//│ > 2
1870

71+
id(0
72+
passTo(id))(2)
73+
//│ = 0
74+
1975
:p
2076
id(
2177
2
@@ -26,6 +82,40 @@ id(
2682
//│ Jux(App(Ident(id),Tup(List(IntLit(2)))),Block(List(App(Ident(print),Tup(List())))))
2783
//│ > 2
2884

85+
:p
86+
id(
87+
id
88+
)(1)
89+
//│ |id|(|→|id|←|↵|)|(|1|)|
90+
//│ Parsed:
91+
//│ App(App(Ident(id),Tup(List(Ident(id)))),Tup(List(IntLit(1))))
92+
//│ = 1
93+
94+
:e
95+
:p
96+
id
97+
1
98+
//│ |id|→|1|←|
99+
//│ Parsed:
100+
//│ Jux(Ident(id),Block(List(IntLit(1))))
101+
//│ ╔══[ERROR] Illegal juxtaposition right-hand side.
102+
//│ ║ l.97: 1
103+
//│ ╙── ^
104+
//│ = [function id]
105+
106+
:e
107+
:re
108+
:p
109+
id(0
110+
id)(1)
111+
//│ |id|(|0|→|id|←|)|(|1|)|
112+
//│ Parsed:
113+
//│ App(App(Ident(id),Tup(List(Jux(IntLit(0),Block(List(Ident(id))))))),Tup(List(IntLit(1))))
114+
//│ ╔══[ERROR] Illegal juxtaposition right-hand side.
115+
//│ ║ l.110: id)(1)
116+
//│ ╙── ^^
117+
//│ ═══[RUNTIME ERROR] TypeError: tmp18 is not a function
118+
29119

30120
fun test2() =
31121
fun funny = (case
@@ -74,10 +164,10 @@ module P with
74164
)
75165
module TraceLogger
76166
//│ ╔══[PARSE ERROR] Expected an expression; found new line instead
77-
//│ ║ l.73: (
78-
//│ ║ ^
79-
//│ ║ l.74: )
80-
//│ ╙── ^^
167+
//│ ║ l.163: (
168+
//│ ║ ^
169+
//│ ║ l.164: )
170+
//│ ╙── ^^
81171

82172

83173
module P with ...

0 commit comments

Comments
 (0)