diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal index 15ae0eeb18e..60204df72df 100644 --- a/plutus-core/plutus-core.cabal +++ b/plutus-core/plutus-core.cabal @@ -489,6 +489,7 @@ library untyped-plutus-core-testlib , filepath , hedgehog , lens + , megaparsec , mtl , plutus-core ^>=1.56 , plutus-core:flat diff --git a/plutus-core/plutus-core/src/PlutusCore/Parser/Type.hs b/plutus-core/plutus-core/src/PlutusCore/Parser/Type.hs index 89b228e7af7..f7c448ac22b 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Parser/Type.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Parser/Type.hs @@ -135,7 +135,7 @@ i.e. parse into @Tree Text@ and do the kind checking afterwards, but given that to do the kind checking of builtins regardless (even for UPLC), we don't win much by deferring doing it. -} defaultUni :: Parser (SomeTypeIn (Kinded DefaultUni)) -defaultUni = +defaultUni = do choice $ map try @@ -153,6 +153,10 @@ defaultUni = , someType @_ @BLS12_381.G2.Element <$ symbol "bls12_381_G2_element" , someType @_ @BLS12_381.Pairing.MlResult <$ symbol "bls12_381_mlresult" , someType @_ @Value <$ symbol "value" + , -- We include an explicit failure case here to produce clearer error messages. + -- Without this, using `choice` with `symbol` results in error messages that cover the longest possible SrcSpan, + -- which in this context would be 20 characters spanning the entire "bls12_381_G2_element" token. + fail "Unknown type, expected one of: bool, integer, bytestring, string, unit, list, array, pair, data, value, bls12_381_G1_element, bls12_381_G2_element, bls12_381_mlresult, or a type application in parens" ] tyName :: Parser TyName diff --git a/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser.hs b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser.hs index 042f4c53f5e..4c8ede2c18f 100644 --- a/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser.hs +++ b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser.hs @@ -23,6 +23,7 @@ import PlutusCore.Annotation import PlutusCore.Error qualified as PLC import PlutusPrelude (through) import Text.Megaparsec hiding (ParseError, State, parse) +import Text.Megaparsec.Char (char) import Text.Megaparsec.Char.Lexer qualified as Lex import UntypedPlutusCore.Check.Uniques (checkProgram) import UntypedPlutusCore.Core.Type qualified as UPLC @@ -40,75 +41,90 @@ import PlutusCore.Version -- | A parsable UPLC term. type PTerm = UPLC.Term PLC.Name PLC.DefaultUni PLC.DefaultFun SrcSpan -conTerm :: Parser PTerm -conTerm = withSpan $ \sp -> - inParens $ UPLC.Constant sp <$> (symbol "con" *> constant) +conTerm :: SrcSpan -> Parser PTerm +conTerm sp = + UPLC.Constant sp <$> constant -builtinTerm :: Parser PTerm -builtinTerm = withSpan $ \sp -> - inParens $ UPLC.Builtin sp <$> (symbol "builtin" *> builtinFunction) +builtinTerm :: SrcSpan -> Parser PTerm +builtinTerm sp = + UPLC.Builtin sp <$> builtinFunction varTerm :: Parser PTerm -varTerm = withSpan $ \sp -> - UPLC.Var sp <$> name +varTerm = + withSpan $ \sp -> UPLC.Var sp <$> name -lamTerm :: Parser PTerm -lamTerm = withSpan $ \sp -> - inParens $ UPLC.LamAbs sp <$> (symbol "lam" *> trailingWhitespace name) <*> term +lamTerm :: SrcSpan -> Parser PTerm +lamTerm sp = + UPLC.LamAbs sp <$> (trailingWhitespace name) <*> term -appTerm :: Parser PTerm -appTerm = withSpan $ \sp -> +appTerm :: SrcSpan -> Parser PTerm +appTerm sp = -- TODO: should not use the same `sp` for all arguments. - inBrackets $ mkIterApp <$> term <*> (fmap (sp,) <$> some term) - -delayTerm :: Parser PTerm -delayTerm = withSpan $ \sp -> - inParens $ UPLC.Delay sp <$> (symbol "delay" *> term) - -forceTerm :: Parser PTerm -forceTerm = withSpan $ \sp -> - inParens $ UPLC.Force sp <$> (symbol "force" *> term) - -errorTerm :: Parser PTerm -errorTerm = withSpan $ \sp -> - inParens $ UPLC.Error sp <$ symbol "error" - -constrTerm :: Parser PTerm -constrTerm = withSpan $ \sp -> - inParens $ do - let maxTag = fromIntegral (maxBound :: Word64) - tag :: Integer <- symbol "constr" *> lexeme Lex.decimal - args <- many term - whenVersion (\v -> v < plcVersion110) $ fail "'constr' is not allowed before version 1.1.0" - when (tag > maxTag) $ fail "constr tag too large: must be a legal Word64 value" - pure $ UPLC.Constr sp (fromIntegral tag) args - -caseTerm :: Parser PTerm -caseTerm = withSpan $ \sp -> - inParens $ do - res <- UPLC.Case sp <$> (symbol "case" *> term) <*> (V.fromList <$> many term) - whenVersion (\v -> v < plcVersion110) $ fail "'case' is not allowed before version 1.1.0" - pure res + mkIterApp <$> term <*> (fmap (sp,) <$> some term) + +delayTerm :: SrcSpan -> Parser PTerm +delayTerm sp = + UPLC.Delay sp <$> term + +forceTerm :: SrcSpan -> Parser PTerm +forceTerm sp = + UPLC.Force sp <$> term + +errorTerm :: SrcSpan -> Parser PTerm +errorTerm sp = + return (UPLC.Error sp) + +constrTerm :: SrcSpan -> Parser PTerm +constrTerm sp = do + let maxTag = fromIntegral (maxBound :: Word64) + tag :: Integer <- lexeme Lex.decimal + args <- many term + whenVersion (\v -> v < plcVersion110) $ fail "'constr' is not allowed before version 1.1.0" + when (tag > maxTag) $ fail "constr tag too large: must be a legal Word64 value" + pure $ UPLC.Constr sp (fromIntegral tag) args + +caseTerm :: SrcSpan -> Parser PTerm +caseTerm sp = do + res <- UPLC.Case sp <$> term <*> (V.fromList <$> many term) + whenVersion (\v -> v < plcVersion110) $ fail "'case' is not allowed before version 1.1.0" + pure res -- | Parser for all UPLC terms. term :: Parser PTerm -term = leadingWhitespace go +term = + leadingWhitespace $ do + choice + [ tryAppTerm + , tryTermInParens + , varTerm + ] where - go = - choice $ - map - try - [ conTerm - , builtinTerm - , varTerm - , lamTerm - , appTerm - , delayTerm - , forceTerm - , errorTerm - , constrTerm - , caseTerm - ] + tryAppTerm :: Parser PTerm + tryAppTerm = do + withSpan $ \sp -> do + _ <- try (symbol "[") + t <- appTerm sp + _ <- char ']' + return t + + tryTermInParens :: Parser PTerm + tryTermInParens = + withSpan $ \sp -> do + _ <- try (symbol "(") + t <- + choice + [ try (symbol "builtin") *> builtinTerm sp + , try (symbol "lam") *> lamTerm sp + , try (symbol "constr") *> constrTerm sp -- "constr" must come before "con" + , try (symbol "con") *> conTerm sp + , try (symbol "delay") *> delayTerm sp + , try (symbol "force") *> forceTerm sp + , try (symbol "error") *> errorTerm sp + , try (symbol "constr") *> constrTerm sp + , try (symbol "case") *> caseTerm sp + ] + _ <- char ')' + return t -- | Parser for UPLC programs. program :: Parser (UPLC.Program PLC.Name PLC.DefaultUni PLC.DefaultFun SrcSpan) diff --git a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs index ea3a24e5cf5..31b7bd118b3 100644 --- a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs +++ b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs @@ -8,6 +8,7 @@ module Generators.Spec where import PlutusPrelude (display, fold, void, (&&&)) import Control.Lens (view) +import Control.Monad (unless) import Data.Text (Text) import Data.Text qualified as T import Hedgehog (annotate, annotateShow, failure, property, tripping, (===)) @@ -16,7 +17,7 @@ import Hedgehog.Range qualified as Range import PlutusCore (Name) import PlutusCore.Annotation (SrcSpan (..)) import PlutusCore.Default (DefaultFun, DefaultUni) -import PlutusCore.Error (ParserErrorBundle) +import PlutusCore.Error (ParserErrorBundle (ParseErrorB)) import PlutusCore.Flat (flat, unflat) import PlutusCore.Generators.Hedgehog (forAllPretty) import PlutusCore.Generators.Hedgehog.AST (runAstGen) @@ -27,6 +28,7 @@ import PlutusCore.Test (isSerialisable) import Test.Tasty (TestTree, testGroup) import Test.Tasty.HUnit (testCase, (@?=)) import Test.Tasty.Hedgehog (testPropertyNamed) +import Text.Megaparsec (errorBundlePretty) import UntypedPlutusCore (Program) import UntypedPlutusCore qualified as UPLC import UntypedPlutusCore.Core.Type (progTerm, termAnn) @@ -56,20 +58,37 @@ propTermSrcSpan = testPropertyNamed "propTermSrcSpan" . property $ do - code <- + code <- genRandomCode + annotateShow code + let (endingLine, endingCol) = getCodeEndingLineAndCol code + result <- parseTermWithTrailingSpace code + case result of + Right term -> do + let (endingLine', endingCol') = getTermEndingLineAndCol term + (endingLine', endingCol') === (endingLine, endingCol + 1) + Left err -> + handleParseError err + where + genRandomCode = display <$> forAllPretty ( view progTerm <$> runAstGen (regenConstantsUntil isSerialisable =<< genProgram @DefaultFun) ) - annotateShow code - let (endingLine, endingCol) = length &&& T.length . last $ T.lines code - trailingSpaces <- forAllPretty $ Gen.text (Range.linear 0 10) (Gen.element [' ', '\n']) - case runQuoteT . parseTerm $ code <> trailingSpaces of - Right parsed -> - let sp = termAnn parsed - in (srcSpanELine sp, srcSpanECol sp) === (endingLine, endingCol + 1) - Left err -> annotate (display err) >> failure + + getCodeEndingLineAndCol code = (length &&& T.length . last) (T.lines code) + + parseTermWithTrailingSpace code = do + trailingSpaces <- genTrailingSpaces + return $ runQuoteT $ parseTerm (code <> trailingSpaces) + + genTrailingSpaces = forAllPretty $ Gen.text (Range.linear 0 10) (Gen.element [' ', '\n']) + + getTermEndingLineAndCol term = do + let sp = termAnn term + (srcSpanELine sp, srcSpanECol sp) + + handleParseError err = annotate (display err) >> failure propUnit :: TestTree propUnit = @@ -114,6 +133,57 @@ propDefaultUni = . parseGen defaultUni . T.pack +{-| Test that parser errors for list element type mismatches point to the correct location. +This uses the exact example from the issue report. -} +propListElementErrorLocation :: TestTree +propListElementErrorLocation = + testCase "List element error location" $ do + let code = + T.unlines + [ "(program 1.1.0 " + , "[" + , " (force (builtin mkCons)) (con integer 4) (con (list integer) [true]) ]" + , ")" + ] + expectedErrorParts = ["unexpected 't'", "expecting '+', '-', ']', or integer"] + case runQuoteT (parseProgram code) of + Right _ -> error "Expected parse error, but parsing succeeded" + Left (ParseErrorB errBundle) -> do + let errMsg = T.pack $ errorBundlePretty errBundle + let hasAllParts = all (`T.isInfixOf` errMsg) expectedErrorParts + unless hasAllParts $ + error $ + "Error message does not match expected format.\n" + <> "Expected to contain: " + <> show expectedErrorParts + <> "\nGot error message:\n" + <> T.unpack errMsg + +{-| Test that parser errors for typos in type names point to the correct location. +This tests the case where "boot" is used instead of "bool". -} +propTypeNameTypoErrorLocation :: TestTree +propTypeNameTypoErrorLocation = + testCase "Type name typo error location" $ do + let code = + T.unlines + [ "(program 1.1.0" + , "[ (builtin integerToByteString) (con boot True) (con integer 0) (con integer 712372356934756347862573452345342345) ]" + , ")" + ] + expectedErrorParts = ["Unknown type", "expected", "bool"] + case runQuoteT (parseProgram code) of + Right _ -> error "Expected parse error, but parsing succeeded" + Left (ParseErrorB errBundle) -> do + let errMsg = T.pack $ errorBundlePretty errBundle + let hasAllParts = all (`T.isInfixOf` errMsg) expectedErrorParts + unless hasAllParts $ + error $ + "Error message does not match expected format.\n" + <> "Expected to contain: " + <> show expectedErrorParts + <> "\nGot error message:\n" + <> T.unpack errMsg + test_parsing :: TestTree test_parsing = testGroup @@ -123,4 +193,6 @@ test_parsing = , propTermSrcSpan , propUnit , propDefaultUni + , propListElementErrorLocation + , propTypeNameTypoErrorLocation ]