Skip to content

[ #71 ] warn about nullable regexs in the absence of start codes #155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions src/AbsSyn.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module AbsSyn (
wrapperName,
Scanner(..),
RECtx(..),
RExp(..),
RExp(..), nullable,
DFA(..), State(..), SNum, StartCode, Accept(..),
RightContext(..), showRCtx, strtype,
encodeStartCodes, extractActions,
Expand Down Expand Up @@ -188,21 +188,21 @@ usesPreds dfa
-- Regular expressions

-- `RExp' provides an abstract syntax for regular expressions. `Eps' will
-- match empty strings; `Ch p' matches strings containinng a single character
-- match empty strings; `Ch p' matches strings containing a single character
-- `c' if `p c' is true; `re1 :%% re2' matches a string if `re1' matches one of
-- its prefixes and `re2' matches the rest; `re1 :| re2' matches a string if
-- `re1' or `re2' matches it; `Star re', `Plus re' and `Ques re' can be
-- expressed in terms of the other operators. See the definitions of `ARexp'
-- for a formal definition of the semantics of these operators.

data RExp
= Eps
| Ch CharSet
| RExp :%% RExp
| RExp :| RExp
| Star RExp
| Plus RExp
| Ques RExp
= Eps -- ^ Empty.
| Ch CharSet -- ^ Singleton.
| RExp :%% RExp -- ^ Sequence.
| RExp :| RExp -- ^ Alternative.
| Star RExp -- ^ Zero or more repetitions.
| Plus RExp -- ^ One or more repetitions.
| Ques RExp -- ^ Zero or one repetitions.

instance Show RExp where
showsPrec _ Eps = showString "()"
Expand All @@ -213,6 +213,17 @@ instance Show RExp where
showsPrec _ (Plus r) = shows r . ('+':)
showsPrec _ (Ques r) = shows r . ('?':)

-- | A regular expression is nullable if it matches the empty string.
nullable :: RExp -> Bool
nullable Eps = True
nullable Ch{} = False
nullable (l :%% r) = nullable l && nullable r
nullable (l :| r) = nullable l || nullable r
nullable Star{} = True
nullable (Plus r) = nullable r
nullable Ques{} = True


{------------------------------------------------------------------------------
Abstract Regular Expression
------------------------------------------------------------------------------}
Expand Down
13 changes: 11 additions & 2 deletions src/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import Info
import Map ( Map )
import qualified Map hiding ( Map )
import Output
import ParseMonad ( runP )
import ParseMonad ( runP, Warning(..) )
import Parser
import Scan
import Util ( hline )
Expand Down Expand Up @@ -114,11 +114,20 @@ parseScript file prg =
Left (Nothing, err) ->
die (file ++ ": " ++ err ++ "\n")

Right script@(_, _, scanner, _) -> do
Right (warnings, script@(_, _, scanner, _)) -> do
-- issue 46: give proper error when lexer definition is empty
when (null $ scannerTokens scanner) $
dieAlex $ file ++ " contains no lexer rules\n"
-- issue 71: warn about nullable regular expressions
mapM_ printWarning warnings
return script
where
printWarning (WarnNullableRExp (AlexPn _ line col) msg) =
hPutStrLn stderr $ concat
[ "Warning: "
, file , ":", show line , ":" , show col , ": "
, msg
]

alex :: [CLIFlags]
-> FilePath
Expand Down
83 changes: 60 additions & 23 deletions src/ParseMonad.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- -----------------------------------------------------------------------------
--
--
-- ParseMonad.hs, part of Alex
--
-- (c) Simon Marlow 2003
Expand All @@ -9,7 +9,7 @@
module ParseMonad (
AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
AlexPosn(..), alexStartPos,

Warning(..), warnIfNullable,
P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
setStartCode, getStartCode, getInput, setInput,
) where
Expand All @@ -23,7 +23,7 @@ import UTF8
#if __GLASGOW_HASKELL__ < 710
import Control.Applicative ( Applicative(..) )
#endif
import Control.Monad ( liftM, ap )
import Control.Monad ( liftM, ap, when )
import Data.Word (Word8)
-- -----------------------------------------------------------------------------
-- The input type
Expand All @@ -49,15 +49,15 @@ alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (_,_,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
(b:bs) = UTF8.encode c
in p' `seq` Just (b, (p', c, bs, s))

-- -----------------------------------------------------------------------------
-- Token positions

-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of chacaters preceding the token), line number
-- fields: the address (number of charaters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
Expand All @@ -77,15 +77,22 @@ alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
-- -----------------------------------------------------------------------------
-- Alex lexing/parsing monad

data Warning
= WarnNullableRExp
{ _warnPos :: AlexPosn -- ^ The position of the code following the regex.
, _warnText :: String -- ^ Warning text.
}

type ParseError = (Maybe AlexPosn, String)
type StartCode = Int

data PState = PState {
smac_env :: Map String CharSet,
rmac_env :: Map String RExp,
startcode :: Int,
input :: AlexInput
}
data PState = PState
{ warnings :: [Warning] -- ^ Stack of warnings, top = last warning.
, smac_env :: Map String CharSet
, rmac_env :: Map String RExp
, startcode :: Int
, input :: AlexInput
}

newtype P a = P { unP :: PState -> Either ParseError (PState,a) }

Expand All @@ -102,15 +109,27 @@ instance Monad P where
Right (env',ok) -> unP (k ok) env'
return = pure

runP :: String -> (Map String CharSet, Map String RExp)
-> P a -> Either ParseError a
runP str (senv,renv) (P p)
-- | Run the parser on given input.
runP :: String
-- ^ Input string.
-> (Map String CharSet, Map String RExp)
-- ^ Character set and regex definitions.
-> P a
-- ^ Parsing computation.
-> Either ParseError ([Warning], a)
-- ^ List of warnings in first-to-last order, result.
runP str (senv,renv) (P p)
= case p initial_state of
Left err -> Left err
Right (_,a) -> Right a
where initial_state =
PState{ smac_env=senv, rmac_env=renv,
startcode = 0, input=(alexStartPos,'\n',[],str) }
Right (s, a) -> Right (reverse (warnings s), a)
where
initial_state = PState
{ warnings = []
, smac_env = senv
, rmac_env = renv
, startcode = 0
, input = (alexStartPos, '\n', [], str)
}

failP :: String -> P a
failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
Expand All @@ -121,24 +140,24 @@ failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)

lookupSMac :: (AlexPosn,String) -> P CharSet
lookupSMac (posn,smac)
= P $ \s@PState{ smac_env = senv } ->
= P $ \s@PState{ smac_env = senv } ->
case Map.lookup smac senv of
Just ok -> Right (s,ok)
Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)

lookupRMac :: String -> P RExp
lookupRMac rmac
= P $ \s@PState{ rmac_env = renv } ->
lookupRMac rmac
= P $ \s@PState{ rmac_env = renv } ->
case Map.lookup rmac renv of
Just ok -> Right (s,ok)
Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)

newSMac :: String -> CharSet -> P ()
newSMac smac set
newSMac smac set
= P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())

newRMac :: String -> RExp -> P ()
newRMac rmac rexp
newRMac rmac rexp
= P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())

setStartCode :: StartCode -> P ()
Expand All @@ -152,3 +171,21 @@ getInput = P $ \s -> Right (s, input s)

setInput :: AlexInput -> P ()
setInput inp = P $ \s -> Right (s{ input = inp }, ())

-- | Add a warning if given regular expression is nullable
-- unless the user wrote the regex 'Eps'.
warnIfNullable
:: RExp -- ^ Regular expression.
-> AlexPosn -- ^ Position associated to regular expression.
-> P ()
-- If the user wrote @()@, they wanted to match the empty sequence!
-- Thus, skip the warning then.
warnIfNullable Eps _ = return ()
warnIfNullable r pos = when (nullable r) $ P $ \ s ->
Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ())
where
w = unwords
[ "Regular expression"
, show r
, "matches the empty string."
]
27 changes: 17 additions & 10 deletions src/Parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,23 @@ tokendefs :: { [RECtx] }
| {- empty -} { [] }

tokendef :: { [RECtx] }
: startcodes rule { [ replaceCodes $1 $2 ] }
: startcodes rule { [ replaceCodes $1 (snd $2) ] }
| startcodes '{' rules '}' { map (replaceCodes $1) $3 }
| rule { [ $1 ] }

rule :: { RECtx }
: context rhs { let (l,e,r) = $1 in
RECtx [] l e r $2 }
| rule {% do
let (pos, res@(RECtx _ _ e _ _)) = $1
warnIfNullable e pos
return [ res ]
}

rule :: { (AlexPosn, RECtx) }
: context rhs { let
(l, e, r) = $1
(pos, code) = $2
in (pos, RECtx [] l e r code)
}

rules :: { [RECtx] }
: rule rules { $1 : $2 }
: rule rules { snd $1 : $2 }
| {- empty -} { [] }

startcodes :: { [(String,StartCode)] }
Expand All @@ -132,9 +139,9 @@ startcode :: { String }
: ZERO { "0" }
| ID { $1 }

rhs :: { Maybe Code }
: CODE { case $1 of T _ (CodeT code) -> Just code }
| ';' { Nothing }
rhs :: { (AlexPosn, Maybe Code) }
: CODE { case $1 of T pos (CodeT code) -> (pos, Just code) }
| ';' { (tokPosn $1, Nothing) }

context :: { Maybe CharSet, RExp, RightContext RExp }
: left_ctx rexp right_ctx { (Just $1,$2,$3) }
Expand Down