Skip to content

Commit c46f1b9

Browse files
author
Anton Vl. Kalinin
committed
[ haskell, #423 ] structured errors in the Haskell backend
A new option "--errors" is introduced, which can change the parser failure type from 'String' to a record type.
1 parent 4fb6fd4 commit c46f1b9

File tree

8 files changed

+166
-39
lines changed

8 files changed

+166
-39
lines changed

source/src/BNFC/Backend/Haskell.hs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import qualified BNFC.Backend.Common.Makefile as Makefile
3333
import BNFC.CF
3434
import BNFC.Options
3535
( SharedOptions(..), TokenText(..), AlexVersion(..), HappyMode(..)
36-
, isDefault, printOptions
36+
, isDefault, printOptions, ErrorType (..)
3737
)
3838
import BNFC.Utils (when, table, getZonedTimeTruncatedToSeconds)
3939

@@ -68,12 +68,12 @@ makeHaskell opts cf = do
6868
-- Generate Happy parser and matching test program.
6969
do
7070
mkfile (happyFile opts) commentWithEmacsModeHint $
71-
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) cf
71+
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) (errorType opts) cf
7272
-- liftIO $ printf "%s Tested with Happy 1.15\n" (happyFile opts)
7373
mkfile (tFile opts) comment $ testfile opts cf
7474

7575
-- Both Happy parser and skeleton (template) rely on Err.
76-
mkfile (errFile opts) comment $ mkErrM errMod
76+
mapM_ (mkfile (errFile opts) comment) $ mkErrM errMod (errorType opts)
7777
mkfile (templateFile opts) comment $ cf2Template (templateFileM opts) absMod (functor opts) cf
7878

7979
-- Generate txt2tags documentation.
@@ -335,7 +335,7 @@ testfile opts cf = unlines $ concat $
335335
[ [ [ "import " , absFileM opts , " (" ++ if_glr impTopCat ++ ")" ] ]
336336
, [ [ "import " , layoutFileM opts , " ( resolveLayout )" ] | lay ]
337337
, [ [ "import " , alexFileM opts , " ( Token, mkPosToken )" ]
338-
, [ "import " , happyFileM opts , " ( " ++ impParser ++ ", myLexer" ++ impParGLR ++ " )" ]
338+
, [ "import " , happyFileM opts , " ( " ++ impParser ++ ", myLexer" ++ impParGLR ++ ", Err )" ]
339339
, [ "import " , printerFileM opts , " ( Print, printTree )" ]
340340
, [ "import " , templateFileM opts , " ()" ]
341341
]
@@ -344,7 +344,6 @@ testfile opts cf = unlines $ concat $
344344
, [ "import qualified Data.Map ( Map, lookup, toList )" | use_glr ]
345345
, [ "import Data.Maybe ( fromJust )" | use_glr ]
346346
, [ ""
347-
, "type Err = Either String"
348347
, if use_glr
349348
then "type ParseFun a = [[Token]] -> (GLRResult, GLR_Output (Err a))"
350349
else "type ParseFun a = [Token] -> Err a"
@@ -357,7 +356,7 @@ testfile opts cf = unlines $ concat $
357356
, "runFile v p f = putStrLn f >> readFile f >>= run v p"
358357
, ""
359358
, "run :: (" ++ xpr ++ if_glr "TreeDecode a, " ++ "Print a, Show a) => Verbosity -> ParseFun a -> " ++ tokenTextType (tokenText opts) ++ " -> IO ()"
360-
, (if use_glr then runGlr else runStd use_xml) myLLexer
359+
, if use_glr then runGlr myLLexer else runStd use_xml myLLexer (errorType opts)
361360
, "showTree :: (Show a, Print a) => Int -> a -> IO ()"
362361
, "showTree v tree = do"
363362
, " putStrV v $ \"\\n[Abstract Syntax]\\n\\n\" ++ show tree"
@@ -408,17 +407,20 @@ testfile opts cf = unlines $ concat $
408407
(hasTopLevelLayout, layoutKeywords, _) = layoutPragmas cf
409408
useTopLevelLayout = isJust hasTopLevelLayout
410409

411-
runStd :: Bool -> (String -> String) -> String
412-
runStd xml myLLexer = unlines $ concat
410+
runStd :: Bool -> (String -> String) -> ErrorType -> String
411+
runStd xml myLLexer errorType = unlines $ concat
413412
[ [ "run v p s ="
414413
, " case p ts of"
415414
, " Left err -> do"
416415
, " putStrLn \"\\nParse Failed...\\n\""
417416
, " putStrV v \"Tokens:\""
418417
, " mapM_ (putStrV v . showPosToken . mkPosToken) ts"
419418
-- , " putStrV v $ show ts"
420-
, " putStrLn err"
421-
, " exitFailure"
419+
]
420+
, case errorType of
421+
ErrorTypeString -> [ " putStrLn err" ]
422+
ErrorTypeStructured -> [ " putStrLn $ \"Error: \" ++ show err" ]
423+
, [ " exitFailure"
422424
, " Right tree -> do"
423425
, " putStrLn \"\\nParse Successful!\""
424426
, " showTree v tree"

source/src/BNFC/Backend/Haskell/CFtoAlex3.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,11 @@ restOfAlex tokenText cf = concat
268268
, "-- A modified \"posn\" wrapper."
269269
, "-------------------------------------------------------------------"
270270
, ""
271-
, "data Posn = Pn !Int !Int !Int"
272-
, " deriving (Eq, Show, Ord)"
271+
, "data Posn = Pn"
272+
, " { pnAbsolute :: !Int"
273+
, " , pnLine :: !Int"
274+
, " , pnColumn :: !Int"
275+
, " } deriving (Eq, Show, Ord)"
273276
, ""
274277
, "alexStartPos :: Posn"
275278
, "alexStartPos = Pn 0 1 1"

source/src/BNFC/Backend/Haskell/CFtoHappy.hs

Lines changed: 100 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import Data.List (intersperse)
1717
import BNFC.CF
1818
import BNFC.Backend.Common.StrUtils (escapeChars)
1919
import BNFC.Backend.Haskell.Utils
20-
import BNFC.Options (HappyMode(..), TokenText(..))
20+
import BNFC.Options (HappyMode(..), TokenText(..), ErrorType(..))
2121
import BNFC.PrettyPrint
2222
import BNFC.Utils
2323

@@ -42,17 +42,18 @@ cf2Happy
4242
-> HappyMode -- ^ Happy mode.
4343
-> TokenText -- ^ Use @ByteString@ or @Text@?
4444
-> Bool -- ^ AST is a functor?
45+
-> ErrorType -- ^ The error type in the parser result type.
4546
-> CF -- ^ Grammar.
4647
-> String -- ^ Generated code.
47-
cf2Happy name absName lexName mode tokenText functor cf = unlines
48+
cf2Happy name absName lexName mode tokenText functor errorType cf = unlines
4849
[ header name absName lexName tokenText eps
4950
, render $ declarations mode functor eps
5051
, render $ tokens cf functor
5152
, delimiter
5253
, specialRules absName functor tokenText cf
5354
, render $ prRules absName functor (rulesForHappy absName functor cf)
5455
, ""
55-
, footer absName tokenText functor eps cf
56+
, footer absName tokenText functor eps errorType cf
5657
]
5758
where
5859
eps = toList $ allEntryPoints cf
@@ -66,7 +67,14 @@ header modName absName lexName tokenText eps = unlines $ concat
6667
, "{-# LANGUAGE PatternSynonyms #-}"
6768
, ""
6869
, "module " ++ modName
69-
, " ( happyError"
70+
, " ( Err"
71+
, " , Failure(..)"
72+
, " , InvalidTokenFailure(..)"
73+
, " , UnexpectedTokenFailure(..)"
74+
, " , UnexpectedEofFailure(..)"
75+
-- TODO: maybe we should stop exporting happyError, since there is no reason
76+
-- to use it outside and its type can vary?
77+
, " , happyError"
7078
, " , myLexer"
7179
]
7280
, map ((" , " ++) . render . parserName) eps
@@ -91,6 +99,8 @@ header modName absName lexName tokenText eps = unlines $ concat
9199
-- -- no lexer declaration
92100
-- %monad { Err } { (>>=) } { return }
93101
-- %tokentype {Token}
102+
-- %errorhandlertype explist
103+
-- %error { happyError }
94104
--
95105
-- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")]
96106
-- %name pA_internal A
@@ -99,14 +109,18 @@ header modName absName lexName tokenText eps = unlines $ concat
99109
-- -- no lexer declaration
100110
-- %monad { Err } { (>>=) } { return }
101111
-- %tokentype {Token}
112+
-- %errorhandlertype explist
113+
-- %error { happyError }
102114
declarations :: HappyMode -> Bool -> [Cat] -> Doc
103115
declarations mode functor ns = vcat
104116
[ vcat $ map generateP ns
105117
, case mode of
106118
Standard -> "-- no lexer declaration"
107-
GLR -> "%lexer { myLexer } { Err _ }",
108-
"%monad { Err } { (>>=) } { return }",
109-
"%tokentype" <+> braces (text tokenName)
119+
GLR -> "%lexer { myLexer } { Err _ }"
120+
, "%monad { Err } { (>>=) } { return }"
121+
, "%tokentype" <+> braces (text tokenName)
122+
, "%errorhandlertype explist"
123+
, "%error { happyError }"
110124
]
111125
where
112126
generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n)
@@ -255,24 +269,88 @@ prRules absM functor = vsep . map prOne
255269

256270
-- Finally, some haskell code.
257271

258-
footer :: ModuleName -> TokenText -> Bool -> [Cat] -> CF -> String
259-
footer absName tokenText functor eps _cf = unlines $ concat
272+
footer :: ModuleName -> TokenText -> Bool -> [Cat] -> ErrorType -> CF -> String
273+
footer absName tokenText functor eps errorType _cf = unlines $ concat
260274
[ [ "{"
261275
, ""
262-
, "type Err = Either String"
276+
, "-- | The parser failure type."
277+
, "--"
278+
, "-- It can contain fields of more specific failure record types, so that they"
279+
, "-- could easily be extended with new fields."
280+
, "data Failure"
281+
, " = FailureInvalidToken !InvalidTokenFailure"
282+
, " | FailureUnexpectedToken !UnexpectedTokenFailure"
283+
, " | FailureUnexpectedEof !UnexpectedEofFailure"
284+
, " deriving (Show, Eq)"
263285
, ""
264-
, "happyError :: [" ++ tokenName ++ "] -> Err a"
265-
, "happyError ts = Left $"
266-
, " \"syntax error at \" ++ tokenPos ts ++ "
267-
, " case ts of"
268-
, " [] -> []"
269-
, " [Err _] -> \" due to lexer error\""
270-
, unwords
271-
[ " t:_ -> \" before `\" ++"
272-
, "(prToken t)"
273-
-- , tokenTextUnpack tokenText "(prToken t)"
274-
, "++ \"'\""
275-
]
286+
, "-- | The lexer error type."
287+
, "newtype InvalidTokenFailure = InvalidTokenFailure"
288+
, " { itfPosn :: Posn -- ^ The position of the beginning of an invalid token."
289+
, " } deriving (Show, Eq)"
290+
, ""
291+
, "-- | The parser error: no production is found to match a token."
292+
, "data UnexpectedTokenFailure = UnexpectedTokenFailure"
293+
, " { utfPosn :: !Posn -- ^ The position of the beginning of the unexpected token."
294+
, " , utfTokenText :: !(" ++ tokenTextType tokenText ++ ")"
295+
, " , utfExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar."
296+
, " } deriving (Show, Eq)"
297+
, ""
298+
, "-- | The parser error: the end of file is encountered but a token is expected."
299+
, "newtype UnexpectedEofFailure = UnexpectedEofFailure"
300+
, " { ueofExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar."
301+
, " } deriving (Show, Eq)"
302+
, ""
303+
]
304+
, case errorType of
305+
ErrorTypeStructured ->
306+
[ "type Err = Either Failure"
307+
, ""
308+
, "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a"
309+
, "happyError = Left . uncurry mkFailure"
310+
]
311+
ErrorTypeString ->
312+
[ "type Err = Either String"
313+
, ""
314+
, "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a"
315+
, "happyError = Left . failureToString . uncurry mkFailure"
316+
, ""
317+
, "failureToString :: Failure -> String"
318+
, "failureToString f ="
319+
, " \"syntax error at \" ++ pos ++ "
320+
, " case f of"
321+
, " FailureUnexpectedEof _ -> []"
322+
, " FailureInvalidToken _ -> \" due to lexer error\""
323+
, unwords
324+
[ " FailureUnexpectedToken ut -> \" before `\" ++"
325+
, tokenTextUnpack tokenText "(utfTokenText ut)"
326+
, "++ \"'\""
327+
]
328+
, " where"
329+
, " pos = case f of"
330+
, " FailureInvalidToken it -> printPosn (itfPosn it)"
331+
, " FailureUnexpectedToken ut -> printPosn (utfPosn ut)"
332+
, " FailureUnexpectedEof _ -> \"end of file\""
333+
]
334+
, [ ""
335+
, "mkFailure :: [" ++ tokenName ++ "] -> [String] -> Failure"
336+
, "mkFailure ts expectedTokens = case ts of"
337+
, " [] ->"
338+
, " FailureUnexpectedEof"
339+
, " UnexpectedEofFailure"
340+
, " { ueofExpectedTokens = expectedTokens"
341+
, " }"
342+
, " [Err pos] ->"
343+
, " FailureInvalidToken"
344+
, " InvalidTokenFailure"
345+
, " { itfPosn = pos"
346+
, " }"
347+
, " t : _ ->"
348+
, " FailureUnexpectedToken"
349+
, " UnexpectedTokenFailure"
350+
, " { utfPosn = tokenPosn t"
351+
, " , utfTokenText = tokenText t"
352+
, " , utfExpectedTokens = expectedTokens"
353+
, " }"
276354
, ""
277355
, "myLexer :: " ++ tokenTextType tokenText ++ " -> [" ++ tokenName ++ "]"
278356
, "myLexer = tokens"

source/src/BNFC/Backend/Haskell/MkErrM.hs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,16 @@
1111
module BNFC.Backend.Haskell.MkErrM where
1212

1313
import BNFC.PrettyPrint
14+
import BNFC.Options (ErrorType(..))
1415

15-
mkErrM :: String -> Doc
16-
mkErrM errMod = vcat
16+
-- | Creates @ErrM.hs@ file if needed.
17+
--
18+
-- It returns 'Nothing' if there is no need to create it.
19+
mkErrM :: String -> ErrorType -> Maybe Doc
20+
mkErrM _ ErrorTypeStructured = Nothing
21+
-- ErrM.hs is only for backward compatibility with old code using string
22+
-- errors, so that we don't create it in case of structured errors.
23+
mkErrM errMod ErrorTypeString = Just $ vcat
1724
[ "{-# LANGUAGE CPP #-}"
1825
, ""
1926
, "#if __GLASGOW_HASKELL__ >= 708"

source/src/BNFC/Backend/HaskellGADT.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ makeHaskellGadt opts cf = do
4343
mkHsFileHint (alexFile opts) $ cf2alex3 lexMod (tokenText opts) cf
4444
liftIO $ putStrLn " (Use Alex 3 to compile.)"
4545
mkHsFileHint (happyFile opts) $
46-
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False cf
46+
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False (errorType opts) cf
4747
liftIO $ putStrLn " (Tested with Happy 1.15 - 1.20)"
4848
mkHsFile (templateFile opts) $ cf2Template (templateFileM opts) absMod cf
4949
mkHsFile (printerFile opts) $ cf2Printer StringToken False True prMod absMod cf
5050
when (hasLayout cf) $ mkHsFile (layoutFile opts) $
5151
cf2Layout layMod lexMod cf
5252
mkHsFile (tFile opts) $ Haskell.testfile opts cf
53-
mkHsFile (errFile opts) $ mkErrM errMod
53+
mapM_ (mkHsFile (errFile opts)) $ mkErrM errMod (errorType opts)
5454
Makefile.mkMakefile opts $ Haskell.makefile opts cf
5555
case xml opts of
5656
2 -> makeXML opts True cf

source/src/BNFC/Options.hs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ module BNFC.Options
1313
, SharedOptions(..)
1414
, defaultOptions, isDefault, printOptions
1515
, AlexVersion(..), HappyMode(..), OCamlParser(..), JavaLexerParser(..)
16-
, RecordPositions(..), TokenText(..)
16+
, RecordPositions(..), TokenText(..), ErrorType(..)
1717
, Ansi(..)
1818
, InPackage
1919
, removedIn290
@@ -83,6 +83,12 @@ instance Show Target where
8383
show TargetPygments = "Pygments"
8484
show TargetCheck = "Check LBNF file"
8585

86+
-- | Which error type to use in the generated parser result?
87+
data ErrorType
88+
= ErrorTypeString -- ^ Errors are plain strings.
89+
| ErrorTypeStructured -- ^ Errors are values of a record/structure type.
90+
deriving (Show,Eq,Ord)
91+
8692
-- | Which version of Alex is targeted?
8793
data AlexVersion = Alex3
8894
deriving (Show,Eq,Ord,Bounded,Enum)
@@ -139,6 +145,7 @@ data SharedOptions = Options
139145
, glr :: HappyMode -- ^ Happy option @--glr@.
140146
, xml :: Int -- ^ Options @--xml@, generate DTD and XML printers.
141147
, agda :: Bool -- ^ Option @--agda@. Create bindings for Agda?
148+
, errorType :: ErrorType -- ^ An error type to use in the parser result.
142149
--- OCaml specific
143150
, ocamlParser :: OCamlParser -- ^ Option @--menhir@ to switch to @Menhir@.
144151
--- Java specific
@@ -173,6 +180,7 @@ defaultOptions = Options
173180
, glr = Standard
174181
, xml = 0
175182
, agda = False
183+
, errorType = ErrorTypeString
176184
-- OCaml specific
177185
, ocamlParser = OCamlYacc
178186
-- Java specific
@@ -233,6 +241,9 @@ printOptions opts = unwords . concat $
233241
, [ "--xml" | xml opts == 1 ]
234242
, [ "--xmlt" | xml opts == 2 ]
235243
, [ "--agda" | agda opts ]
244+
, case errorType opts of
245+
ErrorTypeString -> []
246+
ErrorTypeStructured -> [ "--errors=structured" ]
236247
-- C# options:
237248
, [ "--vs" | visualStudio opts ]
238249
, [ "--wfc" | wcf opts ]
@@ -376,6 +387,9 @@ specificOptions =
376387
, ( Option [] ["generic"] (NoArg (\o -> pure o {generic = True}))
377388
"Derive Data, Generic, and Typeable instances for AST types"
378389
, haskellTargets )
390+
, ( Option [] ["errors"] (ReqArg parseAndSetErrorType "TYPE")
391+
"Set the parser error type. Valid values are `string' (default) and `structured'"
392+
, [TargetHaskell] )
379393
, ( Option [] ["xml"] (NoArg (\o -> pure o {xml = 1}))
380394
"Also generate a DTD and an XML printer"
381395
, haskellTargets )
@@ -387,6 +401,14 @@ specificOptions =
387401
"Also generate Agda bindings for the abstract syntax"
388402
, [TargetHaskell] )
389403
]
404+
where
405+
parseAndSetErrorType arg o = (\t -> o {errorType = t}) <$> parseErrorType arg
406+
407+
parseErrorType s = case s of
408+
"string" -> pure ErrorTypeString
409+
"structured" -> pure ErrorTypeStructured
410+
_ -> Left $ "Wrong error type: " ++ show s
411+
390412

391413
-- | The list of specific options for a target.
392414
specificOptions' :: Target -> [OptDescr (SharedOptions -> Either String SharedOptions)]

source/test/BNFC/Backend/HaskellSpec.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ spec = do
5050
calc <- getCalc
5151
makeHaskell calcOptions calc `shouldGenerate` "ErrM.hs"
5252

53+
it "does not generate a error module file for structured errors" $ do
54+
let options = calcOptions { errorType = ErrorTypeStructured }
55+
calc <- getCalc
56+
makeHaskell options calc `shouldNotGenerate` "ErrM.hs"
57+
5358
context "with option -mMyMakefile and the Calc grammar" $ do
5459
it "generates a Makefile" $ do
5560
calc <- getCalc

0 commit comments

Comments
 (0)