1
+ = ImpParser : Lexing and Parsing in Idris
2
+
3
+ > module ImpParser
4
+ >
5
+
6
+ The development of the Imp language in `Imp . lidr` completely ignores issues of
7
+ concrete syntax -- how an ASCII string that a programmer might write gets
8
+ translated into abstract syntax trees defined by the datatypes \ idr{AExp },
9
+ \ idr{BExp }, and \ idr{Com }. In this chapter, we illustrate how the rest of the
10
+ story can be filled in by building a simple lexical analyzer and parser using
11
+ Idris's functional programming facilities.
12
+
13
+ It is not important to understand all the details here (and accordingly, the
14
+ explanations are fairly terse and there are no exercises). The main point is
15
+ simply to demonstrate that it can be done. You are invited to look through the
16
+ code -- most of it is not very complicated, though the parser relies on some
17
+ " monadic" programming idioms that may require a little work to make out -- but
18
+ most readers will probably want to just skim down to the `Examples` section at
19
+ the very end to get the punchline.
20
+
21
+ > import Maps
22
+ > import Imp
23
+ >
24
+
25
+ == Internals
26
+
27
+ === Lexical Analysis
28
+
29
+ > data Chartype = White | Alpha | Digit | Other
30
+ >
31
+ > classifyChar : (c : Char) -> Chartype
32
+ > classifyChar c =
33
+ > if isSpace c then
34
+ > White
35
+ > else if isAlpha c then
36
+ > Alpha
37
+ > else if isDigit c then
38
+ > Digit
39
+ > else
40
+ > Other
41
+ >
42
+ > Token : Type
43
+ > Token = String
44
+ >
45
+ > tokenizeHelper : (cls : Chartype) -> (acc, xs : List Char) -> List (List Char)
46
+ > tokenizeHelper cls acc xs =
47
+ > case xs of
48
+ > [] => tk
49
+ > (x:: xs') =>
50
+ > case (cls, classifyChar x, x) of
51
+ > (_ , _ , ' (' ) =>
52
+ > tk ++ [' (' ] :: (tokenizeHelper Other [] xs')
53
+ > (_ , _ , ' )' ) =>
54
+ > tk ++ [' )' ] :: (tokenizeHelper Other [] xs')
55
+ > (_ , White , _ ) =>
56
+ > tk ++ (tokenizeHelper White [] xs')
57
+ > (Alpha , Alpha , x) =>
58
+ > tokenizeHelper Alpha (x:: acc) xs'
59
+ > (Digit , Digit , x) =>
60
+ > tokenizeHelper Digit (x:: acc) xs'
61
+ > (Other , Other , x) =>
62
+ > tokenizeHelper Other (x:: acc) xs'
63
+ > (_ , tp, x) =>
64
+ > tk ++ (tokenizeHelper tp [x] xs')
65
+ > where
66
+ > tk : List (List Char)
67
+ > tk = case acc of
68
+ > [] => []
69
+ > (_ :: _ ) => [reverse acc]
70
+ >
71
+ > tokenize : (s : String) -> List String
72
+ > tokenize s = map pack (tokenizeHelper White [] (unpack s))
73
+ >
74
+ > tokenizeEx1 : tokenize "abc12==3 223*(3+(a+c))" = ["abc","12","==","3","223","*","(","3","+","(","a","+","c",")",")"]
75
+ > tokenizeEx1 = Refl
76
+ >
77
+
78
+ === Parsing
79
+
80
+ ==== Options With Errors
81
+
82
+ An \ idr{Option } type with error messages :
83
+
84
+ > data OptionE : (x : Type ) -> Type where
85
+ > SomeE : x -> OptionE x
86
+ > NoneE : String -> OptionE x
87
+ >
88
+
89
+ Some interface instances to make writing nested match- expressions on
90
+ \ idr{OptionE } more convenient.
91
+
92
+ \ todo[inline]{Explain these/ link to Haskell etc? }
93
+
94
+ > Functor OptionE where
95
+ > map f (SomeE x) = SomeE (f x)
96
+ > map _ (NoneE err) = NoneE err
97
+ >
98
+ > Applicative OptionE where
99
+ > pure = SomeE
100
+ > (SomeE f) <*> (SomeE x) = SomeE (f x)
101
+ > (SomeE _ ) <*> (NoneE e) = NoneE e
102
+ > (NoneE e) <*> _ = NoneE e
103
+ >
104
+ > Alternative OptionE where
105
+ > empty = NoneE " "
106
+ > (SomeE x) <|> _ = SomeE x
107
+ > (NoneE _ ) <|> v = v
108
+ >
109
+ > Monad OptionE where
110
+ > (NoneE e) >>= _ = NoneE e
111
+ > (SomeE x) >>= k = k x
112
+ >
113
+
114
+ ==== Generic Combinators for Building Parsers
115
+
116
+ > Parser : (t : Type ) -> Type
117
+ > Parser t = List Token -> OptionE (t, List Token )
118
+ >
119
+ > manyHelper : (p : Parser t) -> (acc : List t) -> (steps : Nat ) -> Parser (List t)
120
+ > manyHelper p acc Z _ = NoneE " Too many recursive calls"
121
+ > manyHelper p acc (S steps') xs with (p xs)
122
+ > | NoneE _ = SomeE (reverse acc, xs)
123
+ > | SomeE (t', xs') = manyHelper p (t':: acc) steps' xs'
124
+ >
125
+
126
+ A (step- indexed) parser that expects zero or more \ idr{p}s :
127
+
128
+ > many : (p : Parser t) -> (steps : Nat ) -> Parser (List t)
129
+ > many p steps = manyHelper p [] steps
130
+ >
131
+
132
+ A parser that expects a given token, followed by \ idr{p}:
133
+
134
+ > firstExpect : (a : Token) -> (p : Parser t) -> Parser t
135
+ > firstExpect a p (x:: xs) = if x == a then p xs else NoneE (" Expected '" ++ a ++ " '" )
136
+ > firstExpect a _ [] = NoneE (" Expected '" ++ a ++ " '" )
137
+ >
138
+
139
+ A parser that expects a particular token :
140
+
141
+ > expect : (t : Token) -> Parser ()
142
+ > expect t = firstExpect t (\ xs => SomeE (() , xs))
143
+ >
144
+
145
+ ==== A Recursive - Descent Parser for Imp
146
+
147
+ Identifiers :
148
+
149
+ > parseIdentifier : Parser Id
150
+ > parseIdentifier [] = NoneE " Expected identifier"
151
+ > parseIdentifier (x:: xs) =
152
+ > if all isLower (unpack x)
153
+ > then SomeE (MkId x, xs)
154
+ > else NoneE (" Illegal identifier:'" ++ x ++ " '" )
155
+ >
156
+
157
+ Numbers :
158
+
159
+ > parseNumber : Parser Nat
160
+ > parseNumber [] = NoneE " Expected number"
161
+ > parseNumber (x:: xs) =
162
+ > if all isDigit (unpack x)
163
+ > then SomeE (foldl (\ n, d => 10 * n + (cast (ord d - ord ' 0' ))) 0 (unpack x), xs)
164
+ > else NoneE " Expected number"
165
+ >
166
+
167
+ Parse arithmetic expressions
168
+
169
+ > mutual
170
+ > parsePrimaryExp : (steps : Nat ) -> Parser AExp
171
+ > parsePrimaryExp Z _ = NoneE " Too many recursive calls"
172
+ > parsePrimaryExp (S steps') xs =
173
+ > (do (i, rest) <- parseIdentifier xs
174
+ > pure (AId i, rest))
175
+ > <|>
176
+ > (do (n, rest) <- parseNumber xs
177
+ > pure (ANum n, rest))
178
+ > <|>
179
+ > (do (e, rest) <- firstExpect " (" (parseSumExp steps') xs
180
+ > (u, rest') <- expect " )" rest
181
+ > pure (e, rest'))
182
+ >
183
+ > parseProductExp : (steps : Nat ) -> Parser AExp
184
+ > parseProductExp Z _ = NoneE " Too many recursive calls"
185
+ > parseProductExp (S steps') xs =
186
+ > do (e, rest) <- parsePrimaryExp steps' xs
187
+ > (es, rest') <- many (firstExpect " *" (parsePrimaryExp steps')) steps' rest
188
+ > pure (foldl AMult e es, rest')
189
+ >
190
+ > parseSumExp : (steps : Nat ) -> Parser AExp
191
+ > parseSumExp Z _ = NoneE " Too many recursive calls"
192
+ > parseSumExp (S steps') xs =
193
+ > do (e, rest) <- parseProductExp steps' xs
194
+ > (es, rest') <- many psum steps' rest
195
+ > pure (foldl (\ e0, term =>
196
+ > case term of
197
+ > (True , e) => APlus e0 e
198
+ > (False , e) => AMinus e0 e
199
+ > ) e es, rest')
200
+ > where
201
+ > psum : Parser (Bool , AExp)
202
+ > psum xs =
203
+ > let p = parseProductExp steps' in
204
+ > (do (e, r) <- firstExpect " +" p xs
205
+ > pure ((True , e), r))
206
+ > <|>
207
+ > (do (e, r) <- firstExpect " -" p xs
208
+ > pure ((False , e), r))
209
+ >
210
+ > parseAExp : (steps : Nat ) -> Parser AExp
211
+ > parseAExp = parseSumExp
212
+ >
213
+
214
+ Parsing boolean expressions :
215
+
216
+ > mutual
217
+ > parseAtomicExp : (steps : Nat ) -> Parser BExp
218
+ > parseAtomicExp Z _ = NoneE " Too many recursive calls"
219
+ > parseAtomicExp (S steps') xs =
220
+ > (do (_ , rest) <- expect " true" xs
221
+ > pure (BTrue , rest))
222
+ > <|>
223
+ > (do (_ , rest) <- expect " false" xs
224
+ > pure (BFalse , rest))
225
+ > <|>
226
+ > (do (e, rest) <- firstExpect " not" (parseAtomicExp steps') xs
227
+ > pure (BNot e, rest))
228
+ > <|>
229
+ > (do (e, rest) <- firstExpect " (" (parseConjunctionExp steps') xs
230
+ > (_ , rest') <- expect " )" rest
231
+ > pure (e, rest'))
232
+ > <|>
233
+ > (do (e, rest) <- parseProductExp steps' xs
234
+ > ((do (e', rest') <- firstExpect " ==" (parseAExp steps') rest
235
+ > pure (BEq e e', rest'))
236
+ > <|>
237
+ > (do (e', rest') <- firstExpect " <=" (parseAExp steps') rest
238
+ > pure (BLe e e', rest'))
239
+ > <|>
240
+ > (NoneE " Expected '==' or '<=' after arithmetic expression" )))
241
+ >
242
+ > parseConjunctionExp : (steps : Nat ) -> Parser BExp
243
+ > parseConjunctionExp Z _ = NoneE " Too many recursive calls"
244
+ > parseConjunctionExp (S steps') xs =
245
+ > do (e, rest) <- parseAtomicExp steps' xs
246
+ > (es, rest') <- many (firstExpect " &&" (parseAtomicExp steps')) steps' rest
247
+ > pure (foldl BAnd e es, rest')
248
+ >
249
+ > parseBExp : (steps : Nat ) -> Parser BExp
250
+ > parseBExp = parseConjunctionExp
251
+ >
252
+ > testParsing : (p : Nat -> Parser t) -> (s : String) -> OptionE (t, List Token)
253
+ > testParsing p s = p 100 (tokenize s)
254
+ >
255
+
256
+ \ todo[inline]{The second one seems designed to fail }
257
+
258
+ `` `idris
259
+ λΠ> testParsing parseProductExp " x*y*(x*x)*x"
260
+
261
+ λΠ> testParsing parseConjunctionExp " not((x==x||x*x<=(x*x)*x)&&x==x"
262
+ `` `
263
+
264
+ Parsing commands :
265
+
266
+ > mutual
267
+ > parseSimpleCommand : (steps : Nat ) -> Parser Com
268
+ > parseSimpleCommand Z _ = NoneE " Too many recursive calls"
269
+ > parseSimpleCommand (S steps') xs =
270
+ > (do (_ , rest) <- expect " SKIP" xs
271
+ > pure (SKIP , rest))
272
+ > <|>
273
+ > (do (e, rest) <- firstExpect " IF" (parseBExp steps') xs
274
+ > (c, rest') <- firstExpect " THEN" (parseSequencedCommand steps') rest
275
+ > (c', rest'' ) <- firstExpect "ELSE" (parseSequencedCommand steps' ) rest'
276
+ > (_ , rest'' ' ) <- expect " END" rest''
277
+ > pure (IFB e THEN c ELSE c' FI , rest'' ' ))
278
+ > <|>
279
+ > (do (e, rest) <- firstExpect " WHILE" (parseBExp steps') xs
280
+ > (c, rest') <- firstExpect " DO" (parseSequencedCommand steps') rest
281
+ > (_ , rest'' ) <- expect "END" rest'
282
+ > pure (WHILE e DO c END , rest'' ))
283
+ > <|>
284
+ > (do (i, rest) <- parseIdentifier xs;
285
+ > (e, rest' ) <- firstExpect " :=" (parseAExp steps') rest
286
+ > pure (i ::= e, rest'))
287
+ >
288
+ > parseSequencedCommand : (steps : Nat ) -> Parser Com
289
+ > parseSequencedCommand Z _ = NoneE " Too many recursive calls"
290
+ > parseSequencedCommand (S steps') xs =
291
+ > do (c, rest) <- parseSimpleCommand steps' xs
292
+ > ((do (c', rest') <- firstExpect " ;;" (parseSequencedCommand steps') rest
293
+ > pure (c ;; c', rest'))
294
+ > <|>
295
+ > (pure (c, rest)))
296
+ >
297
+ > bignumber : Nat
298
+ > bignumber = 1000
299
+ >
300
+ > parse : (str : String) -> OptionE (Com, List Token)
301
+ > parse str = parseSequencedCommand bignumber (tokenize str)
302
+ >
303
+
304
+ == Examples
305
+
306
+ `` `idris
307
+ λΠ> parse " IF x == y + 1 + 2 - y * 6 + 3 THEN x := x * 1;; y := 0 ELSE SKIP END"
308
+ SomeE (CIf (BEq (AId (MkId " x" )) (APlus (AMinus (APlus (APlus (AId (MkId " y" )) (ANum 1 )) (ANum 2 )) (AMult (AId (MkId " y" )) (ANum 6 ))) (ANum 3 )))
309
+ (CSeq (CAss (MkId " x" ) (AMult (AId (MkId " x" )) (ANum 1 ))) (CAss (MkId " y" ) (ANum 0 )))
310
+ CSkip ,
311
+ []) : OptionE (Com , List String )
312
+
313
+ λΠ> parse " SKIP;; z:=x*y*(x*x);; WHILE x==x DO IF z <= z*z && not x == 2 THEN x := z;; y := z ELSE SKIP END;; SKIP END;; x:=z"
314
+ `` `
315
+
316
+ \ todo[inline]{This one is repeated twice in the book for some reason}
317
+
318
+ `` `idris
319
+ λΠ> parse " SKIP;; z:=x*y*(x*x);; WHILE x==x DO IF z <= z*z && not x == 2 THEN x := z;; y := z ELSE SKIP END;; SKIP END;; x:=z"
320
+ SomeE (CSeq CSkip
321
+ (CSeq (CAss (MkId " z" ) (AMult (AMult (AId (MkId " x" )) (AId (MkId " y" ))) (AMult (AId (MkId " x" )) (AId (MkId " x" )))))
322
+ (CSeq (CWhile (BEq (AId (MkId " x" )) (AId (MkId " x" )))
323
+ (CSeq (CIf (BAnd (BLe (AId (MkId " z" )) (AMult (AId (MkId " z" )) (AId (MkId " z" )))) (BNot (BEq (AId (MkId " x" )) (ANum 2 ))))
324
+ (CSeq (CAss (MkId " x" ) (AId (MkId " z" ))) (CAss (MkId " y" ) (AId (MkId " z" ))))
325
+ CSkip )
326
+ CSkip ))
327
+ (CAss (MkId " x" ) (AId (MkId " z" ))))),
328
+ []) : OptionE (Com , List String )
329
+ `` `
0 commit comments