Skip to content

Commit 6a659ea

Browse files
committed
Fix parsing of indented modules
Indented modules weren't parsing due to an error in how the recovery routine derived the current indent column. Recovery derived the indent from the layout stack in the token stream. This layout stack, however, represents the stack after the current token. This is usually OK, but has an edge case when the current token is TokLayoutEnd. For the purposes of recovery, we want TokLayoutEnd's column to represent the current indent. In the specific case of top-level module/declaration parsing, we invoke recovery for all declarations, which are also wrapped in a `many`. Indentation recovery eats any token indented more than the current indentation. Once we get to a TokLayoutEnd for the module, which closes the top-level `where` block, the current layout stack is only LytRoot, so the indent defaults to 0. Because the module is also indented to 0, this will fail the recovery predicate as we expect, resulting in a correct module parse. When the indent is more than 0, it will satisfy the recovery predicate (since TokLayoutEnd col is greater than 0), eating the TokLayoutEnd token. Module parsing would then fail due to an unexpected EOF (where TokLayoutEnd was expected). When take the TokLayoutEnd column into account when deriving the current indent, then the recovery predicate will fail as expected, resulting in a correct parse. Fixes #56
1 parent 663bac9 commit 6a659ea

File tree

6 files changed

+39
-12
lines changed

6 files changed

+39
-12
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
run: npm run install
4646

4747
- name: Build project
48-
run: npm run build -- --pedantic-packages
48+
run: npm run build -- --pedantic-packages --strict
4949

5050
- name: Run tests
5151
run: npm run test -- --offline --quiet

bench/src/ParseFile.purs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import Node.FS.Aff (readFile)
1717
import Node.Process as Process
1818
import PureScript.CST (RecoveredParserResult(..), parseModule)
1919
import PureScript.CST.Errors (ParseError, printParseError)
20-
import PureScript.CST.Lexer (lex)
20+
import PureScript.CST.Lexer (lexModule)
2121
import PureScript.CST.Parser.Monad (PositionedError)
2222
import PureScript.CST.Print (TokenOption(..), printSourceTokenWithOption)
2323
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, step)
@@ -36,7 +36,7 @@ main = launchAff_ do
3636
tokens =
3737
map (foldMap (printSourceTokenWithOption ShowLayout))
3838
$ tokenStreamToArray
39-
$ lex contents
39+
$ lexModule contents
4040
for_ tokens Console.log
4141
else
4242
mempty

spago.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package:
22
name: language-cst-parser
33
description: A parser for the PureScript concrete syntax tree.
4-
build:
5-
strict: true
64
publish:
75
version: 0.14.1
86
license: MIT

src/PureScript/CST/Parser.purs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,14 @@ import Data.Array as Array
1919
import Data.Array.NonEmpty (NonEmptyArray)
2020
import Data.Array.NonEmpty as NonEmptyArray
2121
import Data.Either (Either(..))
22-
import Data.Maybe (Maybe(..), maybe)
22+
import Data.Maybe (Maybe(..))
2323
import Data.Set (Set)
2424
import Data.Set as Set
2525
import Data.Tuple (Tuple(..), uncurry)
2626
import Prim as P
2727
import PureScript.CST.Errors (ParseError(..), RecoveredError(..))
28-
import PureScript.CST.Layout (currentIndent)
2928
import PureScript.CST.Parser.Monad (Parser, eof, lookAhead, many, optional, recover, take, try)
30-
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, layoutStack)
29+
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, currentIndentColumn)
3130
import PureScript.CST.TokenStream as TokenStream
3231
import PureScript.CST.Types (AppSpine(..), Binder(..), ClassFundep(..), DataCtor(..), DataMembers(..), Declaration(..), Delimited, DoStatement(..), Export(..), Expr(..), Fixity(..), FixityOp(..), Foreign(..), Guarded(..), GuardedExpr(..), Ident(..), Import(..), ImportDecl(..), Instance(..), InstanceBinding(..), IntValue(..), Label(..), Labeled(..), LetBinding(..), Module(..), ModuleBody(..), ModuleHeader(..), ModuleName(..), Name(..), OneOrDelimited(..), Operator(..), PatternGuard(..), Prefixed(..), Proper(..), QualifiedName(..), RecordLabeled(..), RecordUpdate(..), Role(..), Row(..), Separated(..), SourceToken, Token(..), Type(..), TypeVarBinding(..), Where(..), Wrapped(..))
3332

@@ -1188,7 +1187,7 @@ recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Tuple (A
11881187
recoverTokensWhile p initStream = go [] initStream
11891188
where
11901189
indent :: Int
1191-
indent = maybe 0 _.column $ currentIndent $ layoutStack initStream
1190+
indent = currentIndentColumn initStream
11921191

11931192
go :: Array SourceToken -> TokenStream -> Tuple (Array SourceToken) TokenStream
11941193
go acc stream = case TokenStream.step stream of

src/PureScript/CST/TokenStream.purs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ module PureScript.CST.TokenStream
55
, consTokens
66
, layoutStack
77
, unwindLayout
8+
, currentIndentColumn
89
) where
910

1011
import Prelude
@@ -13,11 +14,11 @@ import Data.Foldable (class Foldable, foldr)
1314
import Data.Lazy (Lazy)
1415
import Data.Lazy as Lazy
1516
import Data.List (List(..), (:))
16-
import Data.Maybe (Maybe)
17+
import Data.Maybe (Maybe, maybe)
1718
import Data.Newtype (class Newtype, unwrap)
1819
import Data.Tuple (Tuple(..))
1920
import PureScript.CST.Errors (ParseError)
20-
import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, isIndented, lytToken)
21+
import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, currentIndent, isIndented, lytToken)
2122
import PureScript.CST.Types (Comment, LineFeed, SourcePos, SourceToken, Token(..))
2223

2324
newtype TokenStream = TokenStream (Lazy TokenStep)
@@ -64,3 +65,21 @@ unwindLayout pos eof = go
6465
TokenCons (lytToken pos (TokLayoutEnd pos'.column)) pos (go tl) tl
6566
| otherwise ->
6667
step (go tl)
68+
69+
-- In the token stream, the layout stack represents the state after the token.
70+
-- When determining the current indent level, this creates an edge case relating
71+
-- to TokLayoutEnd. The layout stack will return the next indent, but for the
72+
-- purposes of recovery, we want TokLayoutEnd column to be included as the current
73+
-- indent, necessitating special handling.
74+
currentIndentColumn :: TokenStream -> Int
75+
currentIndentColumn stream = case step stream of
76+
TokenError _ _ _ stk ->
77+
stkColumn stk
78+
TokenEOF _ _ ->
79+
0
80+
TokenCons { value: TokLayoutEnd col } _ _ _ ->
81+
col
82+
TokenCons _ _ _ stk ->
83+
stkColumn stk
84+
where
85+
stkColumn = maybe 0 _.column <<< currentIndent

test/Main.purs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
module Test.Main where
1+
module Test.Main (main) where
22

33
import Prelude
44
import Prim hiding (Type)
@@ -370,3 +370,14 @@ main = do
370370
true
371371
_ ->
372372
false
373+
374+
assertParse "Indented module"
375+
"""
376+
module Test where
377+
test = 42
378+
"""
379+
case _ of
380+
ParseSucceeded (Module _) ->
381+
true
382+
_ ->
383+
false

0 commit comments

Comments
 (0)