Skip to content

Commit a112706

Browse files
authored
Merge pull request #43 from idris-hackers/impparser
ImpParser
2 parents eaa63d1 + 6211f10 commit a112706

File tree

4 files changed

+370
-37
lines changed

4 files changed

+370
-37
lines changed

software_foundations.ipkg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ modules = Basics
1111
, ProofObjects
1212
, Rel
1313
, Imp
14+
, ImpParser
1415

1516
brief = "Software Foundations in Idris"
1617
version = 0.0.1.0

src/ImpParser.lidr

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
= ImpParser: Lexing and Parsing in Idris
2+
3+
> module ImpParser
4+
>
5+
6+
The development of the Imp language in `Imp.lidr` completely ignores issues of
7+
concrete syntax -- how an ASCII string that a programmer might write gets
8+
translated into abstract syntax trees defined by the datatypes \idr{AExp},
9+
\idr{BExp}, and \idr{Com}. In this chapter, we illustrate how the rest of the
10+
story can be filled in by building a simple lexical analyzer and parser using
11+
Idris's functional programming facilities.
12+
13+
It is not important to understand all the details here (and accordingly, the
14+
explanations are fairly terse and there are no exercises). The main point is
15+
simply to demonstrate that it can be done. You are invited to look through the
16+
code -- most of it is not very complicated, though the parser relies on some
17+
"monadic" programming idioms that may require a little work to make out -- but
18+
most readers will probably want to just skim down to the `Examples` section at
19+
the very end to get the punchline.
20+
21+
> import Maps
22+
> import Imp
23+
>
24+
25+
== Internals
26+
27+
=== Lexical Analysis
28+
29+
> data Chartype = White | Alpha | Digit | Other
30+
>
31+
> classifyChar : (c : Char) -> Chartype
32+
> classifyChar c =
33+
> if isSpace c then
34+
> White
35+
> else if isAlpha c then
36+
> Alpha
37+
> else if isDigit c then
38+
> Digit
39+
> else
40+
> Other
41+
>
42+
> Token : Type
43+
> Token = String
44+
>
45+
> tokenizeHelper : (cls : Chartype) -> (acc, xs : List Char) -> List (List Char)
46+
> tokenizeHelper cls acc xs =
47+
> case xs of
48+
> [] => tk
49+
> (x::xs') =>
50+
> case (cls, classifyChar x, x) of
51+
> (_, _, '(') =>
52+
> tk ++ ['('] :: (tokenizeHelper Other [] xs')
53+
> (_, _, ')') =>
54+
> tk ++ [')'] :: (tokenizeHelper Other [] xs')
55+
> (_, White, _) =>
56+
> tk ++ (tokenizeHelper White [] xs')
57+
> (Alpha, Alpha, x) =>
58+
> tokenizeHelper Alpha (x::acc) xs'
59+
> (Digit, Digit, x) =>
60+
> tokenizeHelper Digit (x::acc) xs'
61+
> (Other, Other, x) =>
62+
> tokenizeHelper Other (x::acc) xs'
63+
> (_, tp, x) =>
64+
> tk ++ (tokenizeHelper tp [x] xs')
65+
> where
66+
> tk : List (List Char)
67+
> tk = case acc of
68+
> [] => []
69+
> (_::_) => [reverse acc]
70+
>
71+
> tokenize : (s : String) -> List String
72+
> tokenize s = map pack (tokenizeHelper White [] (unpack s))
73+
>
74+
> tokenizeEx1 : tokenize "abc12==3 223*(3+(a+c))" = ["abc","12","==","3","223","*","(","3","+","(","a","+","c",")",")"]
75+
> tokenizeEx1 = Refl
76+
>
77+
78+
=== Parsing
79+
80+
==== Options With Errors
81+
82+
An \idr{Option} type with error messages:
83+
84+
> data OptionE : (x : Type) -> Type where
85+
> SomeE : x -> OptionE x
86+
> NoneE : String -> OptionE x
87+
>
88+
89+
Some interface instances to make writing nested match-expressions on
90+
\idr{OptionE} more convenient.
91+
92+
\todo[inline]{Explain these/link to Haskell etc?}
93+
94+
> Functor OptionE where
95+
> map f (SomeE x) = SomeE (f x)
96+
> map _ (NoneE err) = NoneE err
97+
>
98+
> Applicative OptionE where
99+
> pure = SomeE
100+
> (SomeE f) <*> (SomeE x) = SomeE (f x)
101+
> (SomeE _) <*> (NoneE e) = NoneE e
102+
> (NoneE e) <*> _ = NoneE e
103+
>
104+
> Alternative OptionE where
105+
> empty = NoneE ""
106+
> (SomeE x) <|> _ = SomeE x
107+
> (NoneE _) <|> v = v
108+
>
109+
> Monad OptionE where
110+
> (NoneE e) >>= _ = NoneE e
111+
> (SomeE x) >>= k = k x
112+
>
113+
114+
==== Generic Combinators for Building Parsers
115+
116+
> Parser : (t : Type) -> Type
117+
> Parser t = List Token -> OptionE (t, List Token)
118+
>
119+
> manyHelper : (p : Parser t) -> (acc : List t) -> (steps : Nat) -> Parser (List t)
120+
> manyHelper p acc Z _ = NoneE "Too many recursive calls"
121+
> manyHelper p acc (S steps') xs with (p xs)
122+
> | NoneE _ = SomeE (reverse acc, xs)
123+
> | SomeE (t', xs') = manyHelper p (t'::acc) steps' xs'
124+
>
125+
126+
A (step-indexed) parser that expects zero or more \idr{p}s:
127+
128+
> many : (p : Parser t) -> (steps : Nat) -> Parser (List t)
129+
> many p steps = manyHelper p [] steps
130+
>
131+
132+
A parser that expects a given token, followed by \idr{p}:
133+
134+
> firstExpect : (a : Token) -> (p : Parser t) -> Parser t
135+
> firstExpect a p (x::xs) = if x == a then p xs else NoneE ("Expected '" ++ a ++ "'")
136+
> firstExpect a _ [] = NoneE ("Expected '" ++ a ++ "'")
137+
>
138+
139+
A parser that expects a particular token:
140+
141+
> expect : (t : Token) -> Parser ()
142+
> expect t = firstExpect t (\xs => SomeE ((), xs))
143+
>
144+
145+
==== A Recursive-Descent Parser for Imp
146+
147+
Identifiers:
148+
149+
> parseIdentifier : Parser Id
150+
> parseIdentifier [] = NoneE "Expected identifier"
151+
> parseIdentifier (x::xs) =
152+
> if all isLower (unpack x)
153+
> then SomeE (MkId x, xs)
154+
> else NoneE ("Illegal identifier:'" ++ x ++ "'")
155+
>
156+
157+
Numbers:
158+
159+
> parseNumber : Parser Nat
160+
> parseNumber [] = NoneE "Expected number"
161+
> parseNumber (x::xs) =
162+
> if all isDigit (unpack x)
163+
> then SomeE (foldl (\n, d => 10 * n + (cast (ord d - ord '0'))) 0 (unpack x), xs)
164+
> else NoneE "Expected number"
165+
>
166+
167+
Parse arithmetic expressions
168+
169+
> mutual
170+
> parsePrimaryExp : (steps : Nat) -> Parser AExp
171+
> parsePrimaryExp Z _ = NoneE "Too many recursive calls"
172+
> parsePrimaryExp (S steps') xs =
173+
> (do (i, rest) <- parseIdentifier xs
174+
> pure (AId i, rest))
175+
> <|>
176+
> (do (n, rest) <- parseNumber xs
177+
> pure (ANum n, rest))
178+
> <|>
179+
> (do (e, rest) <- firstExpect "(" (parseSumExp steps') xs
180+
> (u, rest') <- expect ")" rest
181+
> pure (e, rest'))
182+
>
183+
> parseProductExp : (steps : Nat) -> Parser AExp
184+
> parseProductExp Z _ = NoneE "Too many recursive calls"
185+
> parseProductExp (S steps') xs =
186+
> do (e, rest) <- parsePrimaryExp steps' xs
187+
> (es, rest') <- many (firstExpect "*" (parsePrimaryExp steps')) steps' rest
188+
> pure (foldl AMult e es, rest')
189+
>
190+
> parseSumExp : (steps : Nat) -> Parser AExp
191+
> parseSumExp Z _ = NoneE "Too many recursive calls"
192+
> parseSumExp (S steps') xs =
193+
> do (e, rest) <- parseProductExp steps' xs
194+
> (es, rest') <- many psum steps' rest
195+
> pure (foldl (\e0, term =>
196+
> case term of
197+
> (True, e) => APlus e0 e
198+
> (False, e) => AMinus e0 e
199+
> ) e es, rest')
200+
> where
201+
> psum : Parser (Bool, AExp)
202+
> psum xs =
203+
> let p = parseProductExp steps' in
204+
> (do (e, r) <- firstExpect "+" p xs
205+
> pure ((True, e), r))
206+
> <|>
207+
> (do (e, r) <- firstExpect "-" p xs
208+
> pure ((False, e), r))
209+
>
210+
> parseAExp : (steps : Nat) -> Parser AExp
211+
> parseAExp = parseSumExp
212+
>
213+
214+
Parsing boolean expressions:
215+
216+
> mutual
217+
> parseAtomicExp : (steps : Nat) -> Parser BExp
218+
> parseAtomicExp Z _ = NoneE "Too many recursive calls"
219+
> parseAtomicExp (S steps') xs =
220+
> (do (_, rest) <- expect "true" xs
221+
> pure (BTrue, rest))
222+
> <|>
223+
> (do (_, rest) <- expect "false" xs
224+
> pure (BFalse, rest))
225+
> <|>
226+
> (do (e, rest) <- firstExpect "not" (parseAtomicExp steps') xs
227+
> pure (BNot e, rest))
228+
> <|>
229+
> (do (e, rest) <- firstExpect "(" (parseConjunctionExp steps') xs
230+
> (_, rest') <- expect ")" rest
231+
> pure (e, rest'))
232+
> <|>
233+
> (do (e, rest) <- parseProductExp steps' xs
234+
> ((do (e', rest') <- firstExpect "==" (parseAExp steps') rest
235+
> pure (BEq e e', rest'))
236+
> <|>
237+
> (do (e', rest') <- firstExpect "<=" (parseAExp steps') rest
238+
> pure (BLe e e', rest'))
239+
> <|>
240+
> (NoneE "Expected '==' or '<=' after arithmetic expression")))
241+
>
242+
> parseConjunctionExp : (steps : Nat) -> Parser BExp
243+
> parseConjunctionExp Z _ = NoneE "Too many recursive calls"
244+
> parseConjunctionExp (S steps') xs =
245+
> do (e, rest) <- parseAtomicExp steps' xs
246+
> (es, rest') <- many (firstExpect "&&" (parseAtomicExp steps')) steps' rest
247+
> pure (foldl BAnd e es, rest')
248+
>
249+
> parseBExp : (steps : Nat) -> Parser BExp
250+
> parseBExp = parseConjunctionExp
251+
>
252+
> testParsing : (p : Nat -> Parser t) -> (s : String) -> OptionE (t, List Token)
253+
> testParsing p s = p 100 (tokenize s)
254+
>
255+
256+
\todo[inline]{The second one seems designed to fail}
257+
258+
```idris
259+
λΠ> testParsing parseProductExp "x*y*(x*x)*x"
260+
261+
λΠ> testParsing parseConjunctionExp "not((x==x||x*x<=(x*x)*x)&&x==x"
262+
```
263+
264+
Parsing commands:
265+
266+
> mutual
267+
> parseSimpleCommand : (steps : Nat) -> Parser Com
268+
> parseSimpleCommand Z _ = NoneE "Too many recursive calls"
269+
> parseSimpleCommand (S steps') xs =
270+
> (do (_, rest) <- expect "SKIP" xs
271+
> pure (SKIP, rest))
272+
> <|>
273+
> (do (e, rest) <- firstExpect "IF" (parseBExp steps') xs
274+
> (c, rest') <- firstExpect "THEN" (parseSequencedCommand steps') rest
275+
> (c', rest'') <- firstExpect "ELSE" (parseSequencedCommand steps') rest'
276+
> (_, rest''') <- expect "END" rest''
277+
> pure (IFB e THEN c ELSE c' FI, rest'''))
278+
> <|>
279+
> (do (e, rest) <- firstExpect "WHILE" (parseBExp steps') xs
280+
> (c, rest') <- firstExpect "DO" (parseSequencedCommand steps') rest
281+
> (_, rest'') <- expect "END" rest'
282+
> pure (WHILE e DO c END, rest''))
283+
> <|>
284+
> (do (i, rest) <- parseIdentifier xs;
285+
> (e, rest') <- firstExpect ":=" (parseAExp steps') rest
286+
> pure (i ::= e, rest'))
287+
>
288+
> parseSequencedCommand : (steps : Nat) -> Parser Com
289+
> parseSequencedCommand Z _ = NoneE "Too many recursive calls"
290+
> parseSequencedCommand (S steps') xs =
291+
> do (c, rest) <- parseSimpleCommand steps' xs
292+
> ((do (c', rest') <- firstExpect ";;" (parseSequencedCommand steps') rest
293+
> pure (c ;; c', rest'))
294+
> <|>
295+
> (pure (c, rest)))
296+
>
297+
> bignumber : Nat
298+
> bignumber = 1000
299+
>
300+
> parse : (str : String) -> OptionE (Com, List Token)
301+
> parse str = parseSequencedCommand bignumber (tokenize str)
302+
>
303+
304+
== Examples
305+
306+
```idris
307+
λΠ> parse "IF x == y + 1 + 2 - y * 6 + 3 THEN x := x * 1;; y := 0 ELSE SKIP END"
308+
SomeE (CIf (BEq (AId (MkId "x")) (APlus (AMinus (APlus (APlus (AId (MkId "y")) (ANum 1)) (ANum 2)) (AMult (AId (MkId "y")) (ANum 6))) (ANum 3)))
309+
(CSeq (CAss (MkId "x") (AMult (AId (MkId "x")) (ANum 1))) (CAss (MkId "y") (ANum 0)))
310+
CSkip,
311+
[]) : OptionE (Com, List String)
312+
313+
λΠ> parse "SKIP;; z:=x*y*(x*x);; WHILE x==x DO IF z <= z*z && not x == 2 THEN x := z;; y := z ELSE SKIP END;; SKIP END;; x:=z"
314+
```
315+
316+
\todo[inline]{This one is repeated twice in the book for some reason}
317+
318+
```idris
319+
λΠ> parse "SKIP;; z:=x*y*(x*x);; WHILE x==x DO IF z <= z*z && not x == 2 THEN x := z;; y := z ELSE SKIP END;; SKIP END;; x:=z"
320+
SomeE (CSeq CSkip
321+
(CSeq (CAss (MkId "z") (AMult (AMult (AId (MkId "x")) (AId (MkId "y"))) (AMult (AId (MkId "x")) (AId (MkId "x")))))
322+
(CSeq (CWhile (BEq (AId (MkId "x")) (AId (MkId "x")))
323+
(CSeq (CIf (BAnd (BLe (AId (MkId "z")) (AMult (AId (MkId "z")) (AId (MkId "z")))) (BNot (BEq (AId (MkId "x")) (ANum 2))))
324+
(CSeq (CAss (MkId "x") (AId (MkId "z"))) (CAss (MkId "y") (AId (MkId "z"))))
325+
CSkip)
326+
CSkip))
327+
(CAss (MkId "x") (AId (MkId "z"))))),
328+
[]) : OptionE (Com, List String)
329+
```

0 commit comments

Comments
 (0)