diff --git a/.gitignore b/.gitignore index e7ad679..6a06f28 100644 --- a/.gitignore +++ b/.gitignore @@ -8,13 +8,19 @@ # sbt specific .cache .history -.lib/ -dist/* -target/ -lib_managed/ -src_managed/ -project/boot/ -project/plugins/project/ - +.lib +dist +target +lib_managed +src_managed +project +.scala-build +# Tooling +.project +.metals +.bsp +.bloop +# Testing +test.png diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 0000000..54f7147 --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,8 @@ +version = "3.7.15" +runner.dialect = scala3 + +assumeStandardLibraryStripMargin = true +align.stripMargin = true + +binPack.unsafeCallSite = true +binPack.literalArgumentLists = true \ No newline at end of file diff --git a/Vagrantfile b/Vagrantfile index 293a4da..b4805af 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -15,22 +15,21 @@ Vagrant.configure(2) do |config| config.vm.provision "shell", inline: <<-SHELL - # Refresh sources + # Refresh system sudo apt-get update -y + sudo apt-get upgrade -y # Graphviz for printing parsers to graphs sudo apt-get install -y graphviz - # Java - sudo apt-get install -y openjdk-7-jdk - # Sbt - sudo mkdir -p /home/vagrant/bin - pushd /home/vagrant/bin/ - sudo wget https://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/0.13.8/sbt-launch.jar - sudo cp /home/vagrant/configs/sbt.sh /home/vagrant/bin/sbt - sudo chmod u+x /home/vagrant/bin/sbt - sudo chmod +x /home/vagrant/bin/sbt + sudo apt-get install -y apt-transport-https curl gnupg + echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list + echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list + curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo -H gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import + sudo chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg + sudo apt-get update -y + sudo apt-get install -y sbt popd SHELL diff --git a/artifact/build.sbt b/artifact/build.sbt index 157aac1..f22061b 100644 --- a/artifact/build.sbt +++ b/artifact/build.sbt @@ -2,20 +2,19 @@ name := "first-class-derivatives" version := "1.0" -scalaVersion := "2.11.7" +scalaVersion := "3.7.3" scalacOptions ++= Seq("-feature", "-deprecation") -libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.4" % "test" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.19" % "test" -resolvers += "Sonatype OSS Snapshots" at - "https://oss.sonatype.org/content/repositories/releases" +resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/releases" -parallelExecution in Test := true +Test / parallelExecution := true -libraryDependencies += "org.scala-lang.modules" % "scala-xml_2.11" % "1.0.5" +libraryDependencies += "org.scala-lang.modules" %% "scala-xml" % "2.4.0" -initialCommands in console := """import fcd._; import fcd.DerivativeParsers._""" +console / initialCommands := """import fcd._; import fcd.DerivativeParsers._""" // For VM users on windows systems, please uncomment the following line: // target := file("/home/vagrant/target/") diff --git a/artifact/project/build.properties b/artifact/project/build.properties deleted file mode 100644 index 19623ba..0000000 --- a/artifact/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version = 0.13.8 diff --git a/artifact/src/main/scala/examples/PythonAst.scala b/artifact/src/main/scala/examples/PythonAst.scala index c8779d9..7bcb6c9 100644 --- a/artifact/src/main/scala/examples/PythonAst.scala +++ b/artifact/src/main/scala/examples/PythonAst.scala @@ -9,25 +9,24 @@ trait PythonAst { case class Decorated(decorators: Seq[Decorator], el: Any) extends Tree trait Def extends Tree - case class FuncDef(name: Any, params: Any, retAnnot: Option[Any], body: Any) extends Def - - trait Stmt extends Tree - case class Simple(small: Seq[Any]) extends Stmt - - case class Del(exprs: Seq[Any]) extends Stmt - case object Pass extends Stmt - case object Break extends Stmt - case object Continue extends Stmt - case class Return(expr: Option[Any]) extends Stmt - case class Raise(expr: Option[Any]) extends Stmt - case class ExprStmt(expr: Any) extends Stmt - case class Import(names: Any, from: Option[Any] = None) extends Stmt - - case class Global(ids: Seq[Any]) extends Stmt - case class Nonlocal(ids: Seq[Any]) extends Stmt - case class Assert(tests: Seq[Any]) extends Stmt - - case class For(exprs: Seq[Any], in: Any, body: Any, default: Any) extends Stmt + case class FuncDef(name: Any, params: Any, retAnnot: Option[Any], body: Any) + extends Def + + enum Stmt extends Tree { + case Simple(small: Seq[Any]) + case Del(exprs: Seq[Any]) + case Pass + case Break + case Continue + case Return(expr: Option[Any]) + case Raise(expr: Option[Any]) + case ExprStmt(expr: Any) + case Import(names: Any, from: Option[Any] = None) + case Global(ids: Seq[Any]) + case Nonlocal(ids: Seq[Any]) + case Assert(tests: Seq[Any]) + case For(exprs: Seq[Any], in: Any, body: Any, default: Any) + } trait Expr extends Tree case class BinOp(l: Any, op: Any, r: Any) extends Expr diff --git a/artifact/src/main/scala/examples/PythonParsers.scala b/artifact/src/main/scala/examples/PythonParsers.scala index 46762b2..3978968 100644 --- a/artifact/src/main/scala/examples/PythonParsers.scala +++ b/artifact/src/main/scala/examples/PythonParsers.scala @@ -2,128 +2,133 @@ package fcd import scala.language.implicitConversions -/** - * Additional Case Study: Python Parser - * ==================================== - * This file contains an additional python parser implementation to support - * the claims in our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * Submitted to OOPSLA 2016. - * - * The parser is implemented on top of a very simple lexer. The lexer is - * completely indentation unaware and for instance should lex: - * - * while␣(True):\n - * ␣␣a␣*=␣a\n - * - * as - * - * KW("while"), WS, Punct("("), KW("True"), Punct(")"), Punct(":"), NL, - * WS, WS, Id("a"), Punct("*="), WS, Id("a"), NL - * - * Multiline strings should be lexed as instance of Str, with `value` including - * all of the spaces and newlines that appear in the multiline string. - * - * Python programs are then parsed with the parser `preprocess(file_input)`, - * where `preprocess` in turn is a parser combinator composed of the following - * three separately defined "stream preprocessing" parser combinators: - * - * 1. stripComments Removes all comment lexemes from the stream - * 2. explicitJoin Implements explicit line joining by dropping all - * NL tokens that are preceded by a Punct("\\") - * 3. implicitJoin Implements implicit line joining by dropping all - * NL tokens that occur inside pairs of parenthesis. - * - * Interestingly, `implicitJoin` itself is defined from components in the - * following way: - * - * 1. The Dyck language of balanced parenthesis is defined (`dyck`) - * 2. The input to `dyck` is transformed to filter out all non-parenthesis - * tokens (`extDyck`) - * 3. implicitJoin now delegates *all* tokens while it awaits an opening - * parenthesis. After seeing such opening parenthesis it filters out - * NL when delegating until `extDyck` is successful and thus all pairs of - * parens are closed. - * - * Indentation senstivity itself is handled in nonterminal `suite` the way it is - * described in the paper. - * - * The python grammar itself is a straightforward translation of: - * https://docs.python.org/3.5/reference/grammar.html - */ -trait PythonLexemes { self: Parsers with DerivedOps with Syntax => - - trait Lexeme - case class Str(value: String) extends Lexeme - case class Num(value: String) extends Lexeme - case class KW(name: String) extends Lexeme - case class Id(name: String) extends Lexeme - // Punctuation - case class Punct(sym: String) extends Lexeme - case object NL extends Lexeme - case object WS extends Lexeme // whitespace - case class Comment(content: String) extends Lexeme - case object EOS extends Lexeme +/** Additional Case Study: Python Parser + * + * This file contains an additional python parser implementation to support the + * claims in our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives + * Submitted to OOPSLA 2016. + * + * The parser is implemented on top of a very simple lexer. The lexer is + * completely indentation unaware and for instance should lex: + * + * while␣(True):\n ␣␣a␣*=␣a\n + * + * as + * + * KW("while"), WS, Punct("("), KW("True"), Punct(")"), Punct(":"), NL, WS, WS, + * Id("a"), Punct("*="), WS, Id("a"), NL + * + * Multiline strings should be lexed as instance of Str, with `value` including + * all of the spaces and newlines that appear in the multiline string. + * + * Python programs are then parsed with the parser `preprocess(file_input)`, + * where `preprocess` in turn is a parser combinator composed of the following + * three separately defined "stream preprocessing" parser combinators: + * + * 1. stripComments Removes all comment lexemes from the stream 2. + * explicitJoin Implements explicit line joining by dropping all NL tokens + * that are preceded by a Punct("\\") 3. implicitJoin Implements implicit + * line joining by dropping all NL tokens that occur inside pairs of + * parenthesis. + * + * Interestingly, `implicitJoin` itself is defined from components in the + * following way: + * + * 1. The Dyck language of balanced parenthesis is defined (`dyck`) 2. The + * input to `dyck` is transformed to filter out all non-parenthesis tokens + * (`extDyck`) 3. implicitJoin now delegates *all* tokens while it awaits + * an opening parenthesis. After seeing such opening parenthesis it + * filters out NL when delegating until `extDyck` is successful and thus + * all pairs of parens are closed. + * + * Indentation senstivity itself is handled in nonterminal `suite` the way it + * is described in the paper. + * + * The python grammar itself is a straightforward translation of: + * https://docs.python.org/3.5/reference/grammar.html + */ +trait PythonLexemes { self: Parsers & DerivedOps & Syntax => + enum Lexeme { + case Str(value: String) + case Num(value: String) + case KW(name: String) + case Id(name: String) + case Punct(sym: String) + case Comment(content: String) + case NL + case WS + case EOS + } + + import Lexeme._ type Elem = Lexeme - implicit def lex(lex: Elem): Parser[Elem] = accept(lex) - implicit def kw(kw: Symbol): Parser[Elem] = accept(KW(kw.name)) - implicit def punct(p: String): Parser[Elem] = accept(Punct(p)) + given lex: Conversion[Elem, Parser[Elem]] = accept(_) + given kw: Conversion[Symbol, Parser[Elem]] = { kw => accept(KW(kw.name)) } + given punct: Conversion[String, Parser[Elem]] = { p => accept(Punct(p)) } - lazy val string: Parser[Str] = any flatMap { + lazy val string = any >> { case s: Str => succeed(s) - case _ => fail + case _ => fail } - lazy val number: Parser[Num] = any flatMap { + lazy val number = any >> { case n: Num => succeed(n) - case _ => fail + case _ => fail } - lazy val id: Parser[Id] = any flatMap { + lazy val id = any >> { case id: Id => succeed(id) - case _ => fail + case _ => fail } - lazy val comment: Parser[Comment] = any flatMap { + lazy val comment = any >> { case c: Comment => succeed(c) - case _ => fail + case _ => fail } def isComment: Lexeme => Boolean = _.isInstanceOf[Comment] def isNL: Lexeme => Boolean = _ == NL } -trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Syntax with DerivedOps => +trait PythonParsers extends PythonLexemes, PythonAst { + self: Parsers & Syntax & DerivedOps => + + import Stmt._ // general toolbox def no(els: Elem*): Parser[Elem] = acceptIf(el => !(els contains el)) - def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq : _*) - def switch[T](p: Elem => Boolean, thn: Elem => Parser[T], els: Elem => Parser[T]): Parser[T] = - eat { c => if (p(c)) thn(c) else els(c) } + def no(els: Iterable[Elem]): Parser[Elem] = no(els.toSeq*) + def switch[T]( + p: Elem => Boolean, + thn: Elem => Parser[T], + els: Elem => Parser[T] + ) = eat { c => if (p(c)) thn(c) else els(c) } // Simply preprocesses the input stream and strips out comments - def stripComments[T]: Parser[T] => Parser[T] = { p => + def stripComments[T](p: Parser[T]): Parser[T] = { lazy val stripped: Parser[T] = done(p) | switch(isComment, _ => stripped, c => stripComments(p << c)) stripped } + import Lexeme._ + val pairs = Map[Elem, Elem]( Punct("(") -> Punct(")"), Punct("[") -> Punct("]"), - Punct("{") -> Punct("}")) + Punct("{") -> Punct("}") + ) val (opening, closing) = (pairs.keys, pairs.values) - def enclosed[T]: (=> Parser[T]) => Parser[T] = - p => oneOf(opening) >> { o => p <~ pairs(o) } + def enclosed[T](p: => Parser[T]) = oneOf(opening) >> { o => p <~ pairs(o) } // non empty Dyck language on these pairs lazy val dyck: Parser[Any] = enclosed(many(dyck)) // the repetition of enclosed is unfortunate - lazy val extDyck: Parser[Any] = enclosed(always) &> + lazy val extDyck = enclosed(always) &> filter((opening ++ closing).toSeq contains _)(dyck) // From the python reference manual: @@ -132,9 +137,8 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // over more than one physical line without using backslashes. // [...] Implicitly continued lines can carry comments. def implicitJoin[T]: Parser[T] => Parser[T] = repeat[T] { p => - ( extDyck &> filter(_ != NL)(delegate(p)) - | noneOf(opening ++ closing) &> delegate(p) - ) + (extDyck &> filter(_ != NL)(delegate(p)) + | noneOf(opening ++ closing) &> delegate(p)) } // Strips out newlines if they are preceeded by a backslash punctuation @@ -147,21 +151,21 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // backslash that is not part of a string literal or comment, it is joined // with the following forming a single logical line, deleting the backslash // and the following end-of-line character. - def explicitJoin[T]: Parser[T] => Parser[T] = p => { + def explicitJoin[T](p: Parser[T]): Parser[T] = { lazy val join: NT[T] = - done(p) | switch(_ == Punct("\\"), - bs => switch(_ == NL, - _ => join, - c => explicitJoin(p << bs << c)), - c => explicitJoin(p << c)) + done(p) | switch( + _ == Punct("\\"), + bs => switch(_ == NL, _ => join, c => explicitJoin(p << bs << c)), + c => explicitJoin(p << c) + ) join } - val line = many(no(NL)) ~ NL + val line = many(no(NL)) ~ NL val emptyLine = many(WS) ~ NL - def indentBy[T](indentation: Parser[Any]): Parser[T] => Parser[T] = repeat[T] { p => + def indentBy[T](indentation: Parser[Any]) = repeat[T] { p => // here we use (locally) biased choice to prevent ambiguities - biasedAlt ( + biasedAlt( // pass empty lines as NL to p emptyLine ^^ { _ => p << NL }, // first consume `n` spaces, then delegate to p @@ -169,33 +173,35 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy ) } - def indented[T](p: Parser[T]): Parser[T] = - consumed(some(WS)) >> { i => not(prefix(WS)) &> indentBy(acceptSeq(i))(p) <<< i } - - def preprocess[T] = stripComments[T] compose explicitJoin[T] compose implicitJoin[T] + def indented[T](p: Parser[T]) = + consumed(some(WS)) >> { i => + not(prefix(WS)) &> indentBy(acceptSeq(i))(p) <<< i + } + def preprocess[T] = + stripComments[T] compose explicitJoin[T] compose implicitJoin[T] - def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T): Parser[T] = { - lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case l ~ op ~ r => f(l, op, r) } | p) + def binOp[T, S](p: Parser[T], op: Parser[S], f: (T, S, T) => T) = { + lazy val ps: Parser[T] = nonterminal((p ␣ op ␣ ps) ^^ { case ((l, op), r) => + f(l, op, r) + } | p) ps } // --- Space Helpers --- - lazy val whitespace = WS - lazy val linebreak = NL - lazy val space = whitespace | linebreak + val whitespace = WS + val linebreak = NL + val space = alt(whitespace, linebreak) lazy val spaces = many(whitespace) - implicit class SpaceHelpers[T, P <% Parser[T]](p: P) { - def ␣[U](q: => Parser[U]): Parser[T ~ U] = - p ~ (spaces ~> q) - def <␣[U](q: => Parser[U]): Parser[T] = - p <~ (spaces ~ q) - def ␣>[U](q: => Parser[U]): Parser[U] = - p ~> (spaces ~> q) + extension [T](p: Parser[T]) { + def ␣[U](q: => Parser[U]) = p ~ (spaces ~> q) + def <␣[U](q: => Parser[U]) = p <~ (spaces ~ q) + def ␣>[U](q: => Parser[U]) = p ~> (spaces ~> q) } - def listOf[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = + + def listOf[T](p: Parser[T], sep: Parser[Any]) = someSep(p, spaces ~ sep ~ spaces) <~ opt(spaces ~ sep) def optList[T](p: Parser[List[T]]) = p | succeed(Nil) @@ -205,201 +211,209 @@ trait PythonParsers extends PythonLexemes with PythonAst { self: Parsers with Sy // --- Python Grammar --- // see: https://docs.python.org/3.5/reference/grammar.html - lazy val file_input: NT[Program] = emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program + lazy val file_input: NT[Program] = + emptyLine.* ~> many(stmt <~ emptyLine.*) <~ EOS ^^ Program.apply - lazy val decorator: Parser[Decorator] = - "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed(Nil)) <~ NL ^^ Decorator + lazy val decorator: Parser[Decorator] = + "@" ~> dotted_name ~ ("(" ~> optArgs <~ ")" | succeed( + Nil + )) <~ NL ^^ Decorator.apply lazy val decorators: Parser[List[Decorator]] = some(decorator) - lazy val decorated: Parser[Decorated] = - decorators ~ (classdef | funcdef | async_funcdef) ^^ Decorated - + lazy val decorated: Parser[Decorated] = + decorators ~ (classdef | funcdef | async_funcdef) ^^ Decorated.apply // --- Functions --- - lazy val async_funcdef: Parser[FuncDef] = 'async ␣> funcdef + lazy val async_funcdef: Parser[FuncDef] = "async" ␣> funcdef lazy val funcdef: Parser[FuncDef] = - 'def ␣> (id ␣ parameters ~ spacedOpt("->" ␣> test)) ␣ (":" ␣> suite) ^^ FuncDef - - lazy val parameters: Parser[Any] = "(" ~> spacedOpt(typedargslist) <␣ ")" + "def" ␣> (id ␣ parameters ~ spacedOpt( + "->" ␣> test + )) ␣ (":" ␣> suite) ^^ FuncDef.apply + lazy val parameters = "(" ~> spacedOpt(typedargslist) <␣ ")" // ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef] def fpdef(p: Parser[Any]): Parser[Any] = - ( "*" ~ spacedOpt(p) - ~ spacedOpt("," ␣> testdefs(p)) - ~ spacedOpt("," ␣> ("**" ␣ p)) - | "**" ␣ p - ) - def testdefs(p: Parser[Any]): Parser[Any] = someSep(p ~ spacedOpt("=" ␣> test), ",") + ("*" ~ spacedOpt(p) + ~ spacedOpt("," ␣> testdefs(p)) + ~ spacedOpt("," ␣> ("**" ␣ p)) + | "**" ␣ p) + def testdefs(p: Parser[Any]) = someSep(p ~ spacedOpt("=" ␣> test), ",") - lazy val typedargslist: Parser[Any] = + lazy val typedargslist = testdefs(tfpdef) ~ spacedOpt("," ␣> fpdef(tfpdef)) | fpdef(tfpdef) - lazy val varargslist: Parser[Any] = + lazy val varargslist = testdefs(vfpdef) ~ spacedOpt("," ␣> fpdef(vfpdef)) | fpdef(vfpdef) - lazy val tfpdef: Parser[Any] = id ~ spacedOpt(":" ␣> test) - lazy val vfpdef: Parser[Any] = id + lazy val tfpdef = id ~ spacedOpt(":" ␣> test) + lazy val vfpdef = id // --- Statements --- - lazy val stmt: NT[Any] = simple_stmt | compound_stmt - lazy val simple_stmt: Parser[Any] = listOf(small_stmt, ";") <␣ NL ^^ Simple - lazy val small_stmt: Parser[Any] = - ( expr_stmt | del_stmt - | pass_stmt | flow_stmt | import_stmt - | global_stmt | nonlocal_stmt | assert_stmt - ) - - lazy val expr_stmt: Parser[Any] = - ( testlist_star_expr - | testlist_star_expr ␣ augassign ␣ ( yield_expr | testlist ) - | testlist_star_expr ~ some(spaces ~> "=" ␣> ( yield_expr | testlist_star_expr )) - ) ^^ ExprStmt - - lazy val testlist_star_expr: Parser[Any] = listOf(test | star_expr, ",") - - lazy val augassign: Parser[Any] = ( "+=" | "-=" | "*=" | "@=" | "/=" | "%=" - | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" - | "//=" - ) - lazy val del_stmt: Parser[Stmt] = 'del ␣> exprlist ^^ Del - lazy val pass_stmt: Parser[Stmt] = 'pass ^^^ Pass - lazy val flow_stmt: Parser[Stmt] = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt - lazy val break_stmt: Parser[Stmt] = 'break ^^^ Break - lazy val continue_stmt: Parser[Stmt] = 'continue ^^^ Continue - lazy val return_stmt: Parser[Stmt] = 'return ~> spacedOpt(testlist) ^^ Return - lazy val yield_stmt: Parser[Stmt] = yield_expr ^^ ExprStmt - lazy val raise_stmt: Parser[Stmt] = 'raise ~> spacedOpt(test ~ spacedOpt('from ␣ test)) ^^ Raise - lazy val import_stmt: Parser[Any] = import_name | import_from - lazy val import_name: Parser[Any] = 'import ␣> dotted_as_names ^^ { n => Import(n) } + lazy val stmt: NT[Any] = simple_stmt | compound_stmt + lazy val simple_stmt = + listOf(small_stmt, ";") <␣ NL ^^ Simple.apply + lazy val small_stmt = + (expr_stmt | del_stmt + | pass_stmt | flow_stmt | import_stmt + | global_stmt | nonlocal_stmt | assert_stmt) + + lazy val expr_stmt = + (testlist_star_expr + | testlist_star_expr ␣ augassign ␣ (yield_expr | testlist) + | testlist_star_expr ~ some( + spaces ~> "=" ␣> (yield_expr | testlist_star_expr) + )) ^^ ExprStmt.apply + + lazy val testlist_star_expr = listOf(test | star_expr, ",") + + lazy val augassign = ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" + | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" + | "//=") + lazy val del_stmt = "del" ␣> exprlist ^^ Del.apply + lazy val pass_stmt = "pass" ^^^ Pass + lazy val flow_stmt = + break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt + lazy val break_stmt = "break" ^^^ Break + lazy val continue_stmt = "continue" ^^^ Continue + lazy val return_stmt = "return" ~> spacedOpt(testlist) ^^ Return.apply + lazy val yield_stmt = yield_expr ^^ ExprStmt.apply + lazy val raise_stmt = + "raise" ~> spacedOpt(test ~ spacedOpt("from" ␣ test)) ^^ Raise.apply + lazy val import_stmt = import_name | import_from + lazy val import_name = "import" ␣> dotted_as_names ^^ { Import(_) } // # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS - lazy val import_from: Parser[Any] = - ('from ~> (spacedMany("." | "...") ~ dotted_name | some("." | "...")) ␣ - 'import ␣> ("*" | "(" ␣> import_as_names <␣ ")" | import_as_names)) ^^ { - case (from, names) => Import(names, Some(from)) + lazy val import_from = + ("from" ~> (spacedMany("." | "...") ~ dotted_name | some( + "." | "..." + )) ␣ + "import" + ␣> ("*" | "(" ␣> import_as_names <␣ ")" | import_as_names)) ^^ { + case (from, names) => Import(names, Some(from)) } - lazy val import_as_name: Parser[Any] = id ~ spacedOpt('as ␣ id) - lazy val dotted_as_name: Parser[Any] = dotted_name ~ spacedOpt('as ␣ id) - lazy val import_as_names: Parser[Any] = listOf(test | import_as_name, ",") - lazy val dotted_as_names: Parser[Any] = someSep(dotted_as_name, ",") - lazy val dotted_name: Parser[Any] = someSep(id, ".") - - lazy val global_stmt: Parser[Any] = 'global ␣> someSep(id, ",") ^^ Global - lazy val nonlocal_stmt: Parser[Any] = 'nonlocal ␣> someSep(id, ",") ^^ Nonlocal - lazy val assert_stmt: Parser[Any] = 'assert ␣> someSep(test, ",") ^^ Assert + lazy val import_as_name = id ~ spacedOpt("as" ␣ id) + lazy val dotted_as_name = dotted_name ~ spacedOpt("as" ␣ id) + lazy val import_as_names = listOf(test | import_as_name, ",") + lazy val dotted_as_names = someSep(dotted_as_name, ",") + lazy val dotted_name = someSep(id, ".") + lazy val global_stmt = "global" ␣> someSep(id, ",") ^^ Global.apply + lazy val nonlocal_stmt = "nonlocal" ␣> someSep(id, ",") ^^ Nonlocal.apply + lazy val assert_stmt = "assert" ␣> someSep(test, ",") ^^ Assert.apply - lazy val compound_stmt: Parser[Any] = + lazy val compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt - lazy val async_stmt: Parser[Any] = 'async ␣> (funcdef | with_stmt | for_stmt) - lazy val if_stmt: Parser[Any] = - 'if ␣> test ␣ (":" ␣> suite ~ - spacedMany('elif ␣> test ␣ (":" ␣> suite)) ~ - spacedOpt(('else ␣ ":") ␣> suite)) - lazy val while_stmt: Parser[Any] = - 'while ␣> test ␣ (":" ␣> suite ~ spacedOpt(('else ␣ ":") ␣> suite)) - lazy val for_stmt: Parser[Any] = - 'for ␣> exprlist ␣ ('in ␣> testlist ␣ (":" ␣> suite ~ spacedOpt(('else ␣> ":") ␣> suite))) ^^ { - case (exprs ~ (tests ~ (body ~ default))) => For(exprs, tests, body, default) + lazy val async_stmt = + "async" ␣> (funcdef | with_stmt | for_stmt) + lazy val if_stmt = + "if" ␣> test ␣ (":" ␣> suite ~ + spacedMany("elif" ␣> test ␣ (":" ␣> suite)) ~ + spacedOpt(("else" ␣ ":") ␣> suite)) + lazy val while_stmt = "while" ␣> test ␣ (":" ␣> suite ~ spacedOpt( + ("else" ␣ ":") ␣> suite + )) + lazy val for_stmt = + "for" ␣> exprlist ␣ ("in" ␣> testlist ␣ (":" ␣> suite ~ spacedOpt( + ("else" ␣> ":") ␣> suite + ))) ^^ { case (exprs, (tests, (body, default))) => + For(exprs, tests, body, default) } - lazy val try_stmt: Parser[Any] = - ('try ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ - spacedOpt(('else ␣ ":") ␣> suite) ~ - spacedOpt(('finally ␣ ":") ␣> suite) - | (('finally ␣ ":") ␣> suite) - ) - lazy val with_stmt: Parser[Any] = 'with ␣> someSep(with_item, ",") ␣ (":" ␣> suite) - lazy val with_item: Parser[Any] = test ~ spacedOpt('as ␣> expr) + lazy val try_stmt = + ("try" ␣ ":") ␣> suite ␣ (some(except_clause ␣ (":" ␣> suite)) ~ + spacedOpt(("else" ␣ ":") ␣> suite) ~ + spacedOpt(("finally" ␣ ":") ␣> suite) + | (("finally" ␣ ":") ␣> suite)) + lazy val with_stmt = "with" ␣> someSep(with_item, ",") ␣ (":" ␣> suite) + lazy val with_item = test ~ spacedOpt("as" ␣> expr) // # NB compile.c makes sure that the default except clause is last - lazy val except_clause: Parser[Any] = 'except ~> spacedOpt(test ␣ opt('as ␣> id)) - + lazy val except_clause = "except" ~> spacedOpt(test ␣ opt("as" ␣> id)) // INDENTATION // changed to also allow empty lines - lazy val suite: Parser[Any] = simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) + lazy val suite = simple_stmt | NL ~> indented(some(many(emptyLine) ~> stmt)) // --- Expressions --- - lazy val test: NT[Any] = ( or_test ~ spacedOpt('if ␣> or_test ␣ ('else ␣> test)) - | lambdef - ) - lazy val test_nocond: NT[Any] = or_test | lambdef_nocond - lazy val lambdef: NT[Any] = 'lambda ~> spacedOpt(varargslist) ␣ (":" ␣> test) - lazy val lambdef_nocond: NT[Any] = 'lambda ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) - lazy val or_test: NT[Any] = someSep(and_test, 'or) - lazy val and_test: NT[Any] = someSep(not_test, 'and) - lazy val not_test: NT[Any] = 'not ␣> not_test | comparison - lazy val comparison: NT[Any] = someSep(expr, comp_op) + lazy val test: NT[Any] = + (or_test ~ spacedOpt("if" ␣> or_test ␣ ("else" ␣> test)) + | lambdef) + lazy val test_nocond: NT[Any] = or_test | lambdef_nocond + lazy val lambdef: NT[Any] = "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test) + lazy val lambdef_nocond: NT[Any] = + "lambda" ~> spacedOpt(varargslist) ␣ (":" ␣> test_nocond) + lazy val or_test: NT[Any] = someSep(and_test, "or") + lazy val and_test: NT[Any] = someSep(not_test, "and") + lazy val not_test: NT[Any] = "not" ␣> not_test | comparison + lazy val comparison: NT[Any] = someSep(expr, comp_op) // # <> isn't actually a valid comparison operator in Python. It's here for the // # sake of a __future__ import described in PEP 401 (which really works :-) - lazy val comp_op: Parser[Any] = ( "<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" - |'in | 'not ␣ 'in | 'is | 'is ␣ 'not - ) - - lazy val expr: NT[Any] = binOp(xor_expr, "|", BinOp) - lazy val xor_expr: NT[Any] = binOp(and_expr, "^", BinOp) - lazy val and_expr: NT[Any] = binOp(shift_expr, "&", BinOp) - lazy val shift_expr: NT[Any] = binOp(arith_expr, "<<" | ">>", BinOp) - lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", BinOp) - lazy val term: NT[Any] = binOp(factor, "*" | "@" | "/" | "%" | "//", BinOp) - lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power - lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor - - lazy val atom_expr: Parser[Any] = opt('await ~ spaces) ~> atom ~ spacedMany(trailer) - lazy val atom: Parser[Any] = ( "(" ␣> ( yield_expr | testlist_comp) <␣ ")" - | "[" ~> spacedOpt(testlist_comp) <␣ "]" - | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" - | id | number | some(string) | "..." - | 'None | 'True | 'False - ) - - - lazy val star_expr: Parser[Any] = "*" ␣ expr - lazy val yield_expr: Parser[Any] = 'yield ~ spacedOpt('from ␣ test | testlist) - - lazy val testlist_comp: Parser[Any] = ( listOf(test | star_expr, ",") - | (test | star_expr) ␣ comp_for - ) - - lazy val trailer: Parser[Any] = ( "(" ␣> optArgs <␣ ")" - | "[" ␣> subscriptlist <␣ "]" - | "." ␣> id - ) - lazy val subscriptlist: Parser[Any] = listOf(subscript, ",") - lazy val subscript: Parser[Any] = test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt(":" ~> spacedOpt(test)) - lazy val exprlist: Parser[List[Any]] = listOf(expr | star_expr, ",") - lazy val testlist: Parser[Any] = listOf(test, ",") - - lazy val dictorsetmaker: Parser[Any] = - ( ( listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") - | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for - ) - | ( listOf(test | star_expr, ",") - | (test | star_expr) ␣ comp_for - ) + lazy val comp_op = ("<" | ">" | "==" | ">=" | "<=" | "<>" | "!=" + | "in" | "not" ␣ "in" | "is" | "is" ␣ "not") + + lazy val expr: NT[Any] = binOp(xor_expr, "|", BinOp.apply) + lazy val xor_expr: NT[Any] = binOp(and_expr, "^", BinOp.apply) + lazy val and_expr: NT[Any] = binOp(shift_expr, "&", BinOp.apply) + lazy val shift_expr: NT[Any] = + binOp(arith_expr, "<<" | ">>", BinOp.apply) + lazy val arith_expr: NT[Any] = binOp(term, "+" | "-", BinOp.apply) + lazy val term: NT[Any] = + binOp(factor, "*" | "@" | "/" | "%" | "//", BinOp.apply) + lazy val factor: NT[Any] = ("+" | "-" | "~") ␣ factor | power + lazy val power: NT[Any] = atom_expr | atom_expr ␣ "**" ␣ factor + lazy val atom_expr = + opt("await" ~ spaces) ~> atom ~ spacedMany(trailer) + lazy val atom = ("(" ␣> (yield_expr | testlist_comp) <␣ ")" + | "[" ~> spacedOpt(testlist_comp) <␣ "]" + | "{" ~> spacedOpt(dictorsetmaker) <␣ "}" + | id | number | some(string) | "..." + | "None" | "True" | "False") + + lazy val star_expr = "*" ␣ expr + lazy val yield_expr = "yield" ~ spacedOpt("from" ␣ test | testlist) + + lazy val testlist_comp = (listOf(test | star_expr, ",") + | (test | star_expr) ␣ comp_for) + + lazy val trailer = ("(" ␣> optArgs <␣ ")" + | "[" ␣> subscriptlist <␣ "]" + | "." ␣> id) + lazy val subscriptlist = listOf(subscript, ",") + lazy val subscript = + test | spacedOpt(test) ~ ":" ~ spacedOpt(test) ~ spacedOpt( + ":" ~> spacedOpt(test) ) + lazy val exprlist = listOf(expr | star_expr, ",") + lazy val testlist = listOf(test, ",") + lazy val dictorsetmaker = + ((listOf(test ␣ (":" ␣> test) | "**" ␣> expr, ",") + | (test ␣ (":" ␣> test) | "**" ␣> expr) ␣ comp_for) + | (listOf(test | star_expr, ",") + | (test | star_expr) ␣ comp_for)) - lazy val classdef: Parser[Any] = - 'class ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")" )) ␣ (":" ␣> suite) + lazy val classdef = + "class" ␣> (id ~ spacedOpt("(" ␣> optArgs <␣ ")")) ␣ (":" ␣> suite) - lazy val arglist: Parser[List[Any]] = listOf(argument, ",") - lazy val optArgs: Parser[List[Any]] = arglist | succeed(Nil) + lazy val arglist = listOf(argument, ",") + lazy val optArgs = arglist | succeed(Nil) - lazy val argument: Parser[Any] = - ( test ~ spacedOpt(comp_for) + lazy val argument = (test ~ spacedOpt(comp_for) | test ␣ "=" ␣ test | "**" ␣ test - | "*" ␣ test - ) + | "*" ␣ test) - lazy val comp_iter: NT[Any] = comp_for | comp_if - lazy val comp_for: NT[Any] = 'for ␣> exprlist ␣ ('in ␣> or_test ~ spacedOpt(comp_iter)) - lazy val comp_if: Parser[Any] = 'if ␣> test_nocond ~ spacedOpt(comp_iter) + lazy val comp_iter: NT[Any] = comp_for | comp_if + lazy val comp_for = + "for" ␣> exprlist ␣ ("in" ␣> or_test ~ spacedOpt(comp_iter)) + lazy val comp_if = "if" ␣> test_nocond ~ spacedOpt(comp_iter) } -object PythonParsers extends PythonParsers with DerivedOps with DerivativeParsers with Syntax { - override def accept(t: Elem): Parser[Elem] = acceptIf(_ == t) +object PythonParsers + extends PythonParsers, + DerivedOps, + DerivativeParsers, + Syntax { + override def accept(t: Elem) = acceptIf(_ == t) } diff --git a/artifact/src/main/scala/examples/paper/Paper.scala b/artifact/src/main/scala/examples/paper/Paper.scala index 6f4d40a..e4bacee 100644 --- a/artifact/src/main/scala/examples/paper/Paper.scala +++ b/artifact/src/main/scala/examples/paper/Paper.scala @@ -1,16 +1,14 @@ package fcd -/** - * This object instantiates the examples from section 3, 4 and 7 and makes them - * available in the REPL via: - * - * > import paper._ - */ -object paper extends Section3 with Section4 with Section7 { +/** This object instantiates the examples from section 3, 4 and 7 and makes them + * available in the REPL via: + * + * > import paper._ + */ - // Use the derivative based parsers for examples in the paper - type Parsers = DerivativeParsers.type - def _parsers: DerivativeParsers.type = DerivativeParsers - override lazy val parsers: DerivativeParsers.type = _parsers - -} +object paper + extends RichParsers + with DerivativeParsers + with Section3 + with Section4 + with Section7 diff --git a/artifact/src/main/scala/examples/paper/Section3.scala b/artifact/src/main/scala/examples/paper/Section3.scala index 2319e73..ecc70a2 100644 --- a/artifact/src/main/scala/examples/paper/Section3.scala +++ b/artifact/src/main/scala/examples/paper/Section3.scala @@ -1,47 +1,36 @@ package fcd -/** - * Section 3 - First-class Derivatives: Gaining - * Fine Grained Control over the Input Stream - * =========================================== - * This file contains all code examples from section 3 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * The examples are grouped by subsections. For every subsection with - * examples we introduced a corresponding Scala object below. - * - * You can experiment with the examples of this file in the REPL by: - * - * > console - * scala> import paper.section_3_2._ - * scala> number.parse("42") - * res0: Results[Int] = List(42) - * - * You can reach the Scala console by entering 'console' at the - * sbt prompt. - * - * Additional note: All examples are parametrized by the parser combinator - * library to allow experimenting with different implementations. This should - * also support future research and alternate implementations. - * - * All the traits containing paper examples are eventually combined and - * instantiated to an object `paper` in `Paper.scala`. - */ - -trait Section3 extends ParserUsage { - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ - - /** - * Section 3.2 First-Class Derivatives - */ +import scala.language.implicitConversions + +/** Section 3 – Gaining Fine Grained Control over the Input Stream + * + * This file contains all code examples from section 3 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * The examples are grouped by subsections. For every subsection with examples + * we introduced a corresponding Scala object below. + * + * You can experiment with the examples of this file in the REPL by: + * + * > console scala> import paper.section_3_2._ scala> number.parse("42") res0: + * Results[Int] = List(42) + * + * You can reach the Scala console by entering 'console' at the sbt prompt. + * + * Additional note: All examples are parametrized by the parser combinator + * library to allow experimenting with different implementations. This should + * also support future research and alternate implementations. + * + * All the traits containing paper examples are eventually combined and + * instantiated to an object `paper` in `Paper.scala`. + */ + +trait Section3 { self: RichParsers => + + /** Section 3.2 First-Class Derivatives + */ object section_3_2 { // ### Example of Subsection 3.2: First-Class Derivatives (<<) @@ -89,7 +78,6 @@ trait Section3 extends ParserUsage { // This requires that you have graphviz installed on your computer. val q: Parser[List[Char]] = many('a') - // ### Example of Subsection 3.2: Combinator "nt" // // Difference: The combinator `nt` in the paper is called `nonterminal` in @@ -107,12 +95,13 @@ trait Section3 extends ParserUsage { // // The implicit conversions that wrap the production into `nonterminal` // calls are defined in the file Syntax.scala - val digit: Parser[Int] = acceptIf(_.isDigit) ^^ { s => Integer.valueOf(s.toString) } + val digit: Parser[Int] = acceptIf(_.isDigit) ^^ { s => + Integer.valueOf(s.toString) + } val number: Parser[Int] = - nonterminal( number ~ digit ^^ { case (n, d) => (n * 10) + d } - | digit - ) + nonterminal(number ~ digit ^^ { case (n, d) => (n * 10) + d } + | digit) // To get an overview of the available parser combinator refer to: // @@ -123,25 +112,23 @@ trait Section3 extends ParserUsage { } - /** - * Section 3.4 Implementation using First-Class Derivatives - */ + /** Section 3.4 Implementation using First-Class Derivatives + */ object section_3_4 { // Figure 4a. Definition of the combinator indented(p) in terms of <<. - def indented[T](p: Parser[T]): Parser[T] = + def indented[T](p: Parser[T]) = done(p) | (space ~ space) ~> readLine(p) def readLine[T](p: Parser[T]): Parser[T] = - ( no('\n') >> { c => readLine(p << c) } - | accept('\n') >> { c => indented(p << c) } - ) + (no('\n') >> { c => readLine(p << c) } + | accept('\n') >> { c => indented(p << c) }) // To inspect the virtual input stream of some parser `p` in `indented(p)` // one can use the following parser as kind of "mock-parser" // // It will accept all words and return the input stream it has processed. - val collect = consumed(many(any)) map (_.mkString) + val collect = consumed(many(any)) ^^ { _.mkString } // for instance, you can try the following in the REPL // @@ -158,27 +145,24 @@ trait Section3 extends ParserUsage { // please note the use of combinator `manyN(n, space)` which recognizes // n-many spaces. - def indentBy[T](n: Int): Parser[T] => Parser[T] = p => + def indentBy[T](n: Int)(p: Parser[T]) = done(p) | manyN(n, space) ~> readLine(n)(p) // Only change: pass the level of indentation as parameter around def readLine[T](n: Int)(p: Parser[T]): Parser[T] = - ( no('\n') >> { c => readLine(n)(p << c) } - | accept('\n') >> { c => indentBy(n)(p << c) } - ) + (no('\n') >> { c => readLine(n)(p << c) } + | accept('\n') >> { c => indentBy(n)(p << c) }) // Here we first read some spaces (at least one) and then invoke // `indentBy`. - def indented[T](p: Parser[T]): Parser[T] = consumed(some(space)) >> { case s => + def indented[T](p: Parser[T]) = consumed(some(space)) >> { s => // this simulates lookahead for greedy matching no(' ') >> { c => indentBy(s.size)(p) <<< s << c } } } - - /** - * Derived Combinators - */ + /** Derived Combinators + */ object section_3_5 { // Section 3.5 introduces `delegate` and `repeat`. The implementation of @@ -198,7 +182,6 @@ trait Section3 extends ParserUsage { def injectA[T](p: Parser[T]): Parser[T] = ((any ~ any) &> delegate(p)) >> { p2 => 'a' ~> p2 } - // Not in the paper: Example for usage of combinator `repeat`. // every two tokens recognize an intermediate token 'a'. // @@ -209,13 +192,13 @@ trait Section3 extends ParserUsage { // Please note, that since we repeatedly delimit with `any ~ any` the // resulting parser can only recognize words in { (xxa)* | x ∈ Σ } def injectAs[T] = repeat[T] { p => - ((any ~ any) &> delegate(p)) <~ 'a' + ((any ~ any) &> delegate(p)) <~ 'a' } // Figure 5b. Definition of the combinator `indented(p)` in terms of `delegate`. lazy val line = many(no('\n')) <~ '\n' def indented[T]: Parser[T] => Parser[T] = repeat[T] { p => - (space ~ space) ~> (line &> delegate(p)) + (space ~ space) ~> (line &> delegate(p)) } // To experiment with this implementation of indented you can selectively @@ -226,11 +209,9 @@ trait Section3 extends ParserUsage { // involving the indentation combinator. } - - /** - * Symmetrical to section_3_4 and section_3_4_improved we can define flexible - * indentation using delegate and repeat. - */ + /** Symmetrical to section_3_4 and section_3_4_improved we can define flexible + * indentation using delegate and repeat. + */ object section_3_5_improved { lazy val line = many(no('\n')) <~ '\n' @@ -238,7 +219,7 @@ trait Section3 extends ParserUsage { manyN(n, space) ~> (line &> delegate(p)) } - def indented[T](p: Parser[T]): Parser[T] = consumed(some(space)) >> { case s => + def indented[T](p: Parser[T]) = consumed(some(space)) >> { s => no(' ') >> { c => indentBy(s.size)(p) <<< s << c } } } diff --git a/artifact/src/main/scala/examples/paper/Section4.scala b/artifact/src/main/scala/examples/paper/Section4.scala index 7fe7c8a..855549a 100644 --- a/artifact/src/main/scala/examples/paper/Section4.scala +++ b/artifact/src/main/scala/examples/paper/Section4.scala @@ -1,29 +1,21 @@ package fcd -/** - * Section 4 - Applications - * ========================== - * This file contains all code examples from section 5 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * Section 4 gives additional applications and use cases where our approach - * results in a modular solution. - */ -trait Section4 extends ParserUsage { self: Section3 => - - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers - - // import all symbols from the library - import parsers._ - - - /** - * Section 4.1 - Increased Reuuse through Parser Selection - */ +import scala.language.implicitConversions + +/** Section 4 – Applications + * + * This file contains all code examples from section 4 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * Section 4 gives additional applications and use cases where our approach + * results in a modular solution. + */ +trait Section4 { self: Section3 & RichParsers => + + /** Section 4.1 - Increased Reuuse through Parser Selection + */ object section_4_1 { // very simplified grammar to illustrate parser selection @@ -31,8 +23,7 @@ trait Section4 extends ParserUsage { self: Section3 => lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) @@ -51,11 +42,10 @@ trait Section4 extends ParserUsage { self: Section3 => lazy val untilStmt = "until" ~> (stmt <<< "while") } - /** - * Section 4.2 Modular Definitions as Combinators - */ + /** Section 4.2 Modular Definitions as Combinators + */ object section_4_2 { - def unescChar(c: Char): String = StringContext treatEscapes s"\\$c" + def unescChar(c: Char) = StringContext processEscapes s"\\$c" // ### Example. Preprocessor that unescapes backslash escaped characters // @@ -64,9 +54,10 @@ trait Section4 extends ParserUsage { self: Section3 => // unescape(many("\n" | "a")) parse "\\na\\n\\naaa" def unescape[T](p: Parser[T]): Parser[T] = done(p) | eat { - case '\\' => char >> { c => - unescape( p <<< unescChar(c) ) - } + case '\\' => + char >> { c => + unescape(p <<< unescChar(c)) + } case c => unescape(p << c) } @@ -74,19 +65,17 @@ trait Section4 extends ParserUsage { self: Section3 => // ### Example Figure 6a. Combinators for interleaved parsing of fenced code // blocks. - val marker: Parser[Any] = lineEnd ~ "~~~" ~ lineEnd + val marker = lineEnd ~ "~~~" ~ lineEnd // We have two states: Inside the code block and outside the code block def inCode[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = - ( marker ~> inText(text, code) - | eat { c => inCode(text, code << c) } - ) + (marker ~> inText(text, code) + | eat { c => inCode(text, code << c) }) def inText[R, S](text: Parser[R], code: Parser[S]): NT[(R, S)] = - ( done(text & code) - | marker ~> inCode(text, code) - | eat { c => inText(text << c, code) } - ) + (done(text & code) + | marker ~> inCode(text, code) + | eat { c => inText(text << c, code) }) // Simple variant of balanced parenthesis lazy val parens: NT[Any] = '(' ~ parens ~ ')' | succeed(()) @@ -98,7 +87,7 @@ trait Section4 extends ParserUsage { self: Section3 => // // aaaaa // aaaaa - val as: Parser[Any] = some(many('a') <~ lineEnd) + val as = some(many('a') <~ lineEnd) // Now we can retroactively combine the two parsers `parens` and `as` by // The resulting parser can parse for instance words like @@ -128,12 +117,11 @@ trait Section4 extends ParserUsage { self: Section3 => // arbitrary positions. // // We will use this combinator in the following example - def spaced[T]: Parser[T] => Parser[T] = p => - done(p) | eat { - case ' ' => spaced(p) - case '\n' => spaced(p) - case c => spaced(p << c) - } + def spaced[T](p: Parser[T]): Parser[T] = done(p) | eat { + case ' ' => spaced(p) + case '\n' => spaced(p) + case c => spaced(p << c) + } // ### Example Figure 6c. Modular definition of a parser combinator for // ASCII-tables. @@ -146,25 +134,31 @@ trait Section4 extends ParserUsage { self: Section3 => (head <~ lineEnd) >> { layout => body(layout, cell) } // a parser computing the table layout - def head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' + def head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' - def body[T](layout: Layout, cell: Parser[T]): Parser[List[List[T]]] = + def body[T](layout: Layout, cell: Parser[T]) = many(rowLine(layout, layout.map(n => cell)) <~ rowSeparator(layout)) // given a layout, creates a parser for row separators - def rowSeparator(layout: Layout): Parser[Any] = - layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ lineEnd + def rowSeparator(layout: Layout) = + layout + .map { n => List.fill(n)('-').mkString + "+" } + .foldLeft("+")(_ + _) ~ lineEnd // either read another rowLine or quit cell parsers and collect results def rowLine[T](layout: Layout, cells: List[Parser[T]]): Parser[List[T]] = - ( ('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => rowLine(layout, cs) } - | collect(cells) - ) + (('|' ~> distr(delegateCells(layout, cells)) <~ lineEnd) >> { cs => + rowLine(layout, cs) + } + | collect(cells)) // first feed n tokens to every cell parser, then feed newline and read a pipe - def delegateCells[T](layout: Layout, cells: List[Parser[T]]): List[Parser[Parser[T]]] = - layout.zip(cells).map { - case (n, p) => delegateN(n, p).map(_ << '\n') <~ '|' + def delegateCells[T]( + layout: Layout, + cells: List[Parser[T]] + ) = + layout.zip(cells).map { case (n, p) => + map(delegateN(n, p), (_ << '\n')) <~ '|' } // We can use the table combinator recursively to parse nested tables. @@ -184,7 +178,7 @@ trait Section4 extends ParserUsage { self: Section3 => // |~~~ | // |aaaa| // +----+ - lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) + lazy val combined: NT[Any] = inText(asAndTables, spaced(parens)) lazy val asAndTables: NT[Any] = as | table(combined) // Again, some more examples of words that are recognized by `combined` can diff --git a/artifact/src/main/scala/examples/paper/Section7.scala b/artifact/src/main/scala/examples/paper/Section7.scala index 151478a..25ac506 100644 --- a/artifact/src/main/scala/examples/paper/Section7.scala +++ b/artifact/src/main/scala/examples/paper/Section7.scala @@ -1,55 +1,46 @@ package fcd -/** - * Section 7 - Implementation - * ========================== - * This file contains all code examples from section 7 of our paper: - * - * Brachthäuser, Rendel, Ostermann. - * Parsing with First-Class Derivatives - * To appear in OOPSLA 2016. - * - * Section 7 introduces the implementation of our parser combinator library. In - * addition to repeating the few examples from the paper in this file we explain - * the relation between the implementation in the paper and in the artifact. - * - * As described in the paper, the core of the implementation builds on - * derivative based parsing as described by Matt Might et al, translated to an - * object oriented setting. - */ -trait Section7 extends ParserUsage { +import scala.language.implicitConversions - // Require a library implementation that also supports the derived combinators - type Parsers <: RichParsers +/** Section 7 – Implementation This file contains all code examples from section + * 7 of our paper. + * + * Brachthäuser, Rendel, Ostermann. Parsing with First-Class Derivatives To + * appear in OOPSLA 2016. + * + * Section 7 introduces the implementation of our parser combinator library. In + * addition to repeating the few examples from the paper in this file we + * explain the relation between the implementation in the paper and in the + * artifact. + * + * As described in the paper, the core of the implementation builds on + * derivative based parsing as described by Matt Might et al, translated to an + * object oriented setting. + */ - // import all symbols from the library - import parsers._ +trait Section7 { self: RichParsers => - /** - * Section 7.1, introduces the concrete type of a parser as - * - * trait P[+R] { - * def results: Res[R] - * def derive: Elem => P[R] - * } - * - * The corresponding concrete type of this artifact can be found in - * `DerivativeParsers.scala` (corresponding to Figure 10) which contains the - * implementation of the interface defined in `Parsers.scala` - * (corresponding to Figure 1a.). - * - * Please note the following important differences: - * - `derive` is called `consume` in this artifact. - * - the trait `Parser[+R]` has default implementations for the various - * combinators. This corresponds to the later developments in Section 7.4 - * "Compaction by Dynamic Dispatch". - * - Instead of anonymous subclasses (such as `def fail[R] = new P[R] {...}`) - * the various combinators are implemented by named classes / objects - * (that is, `object Fail extends P[Nothing] { ... }`). - * - We added a special primitive parser `always` which is bisimilar to - * `many(any)` and thus dual (in some sense) to `fail`. Having it as a - * primitive gives rise to some optimizations. - */ + /** Section 7.1, introduces the concrete type of a parser as + * + * trait P[+R] { def results: Res[R] def derive: Elem => P[R] } + * + * The corresponding concrete type of this artifact can be found in + * `DerivativeParsers.scala` (corresponding to Figure 10) which contains the + * implementation of the interface defined in `Parsers.scala` (corresponding + * to Figure 1a.). + * + * Please note the following important differences: + * - `derive` is called `consume` in this artifact. + * - the trait `Parser[+R]` has default implementations for the various + * combinators. This corresponds to the later developments in Section 7.4 + * "Compaction by Dynamic Dispatch". + * - Instead of anonymous subclasses (such as `def fail[R] = new P[R] + * {...}`) the various combinators are implemented by named classes / + * objects (that is, `object Fail extends P[Nothing] { ... }`). + * - We added a special primitive parser `always` which is bisimilar to + * `many(any)` and thus dual (in some sense) to `fail`. Having it as a + * primitive gives rise to some optimizations. + */ object section_7 { // ### Example. Derivative of some(a) @@ -59,7 +50,6 @@ trait Section7 extends ParserUsage { // > (as << 'a').printToFile("as_derive_a.png") val as = some('a') - // ### Example. Derivative with compaction // // You can observe the result of derivation an compaction by comparing @@ -85,5 +75,4 @@ trait Section7 extends ParserUsage { // grammars. Thus, it might be instructive to also inspect the tests in // `test/scala/LeftrecTests.scala`. } - } diff --git a/artifact/src/main/scala/library/Attributed.scala b/artifact/src/main/scala/library/Attributed.scala index ed83272..6c66055 100644 --- a/artifact/src/main/scala/library/Attributed.scala +++ b/artifact/src/main/scala/library/Attributed.scala @@ -1,161 +1,155 @@ package might -/** -============================================================================================= -The contents of this file are taken (adapted) from Matt Might's implementation of -parsing with derivatives. The original implementation can be found online at: +/* + The contents of this file are taken (adapted) from Matt Might's + implementation of parsing with derivatives. The original implementation can + be found online at: - http://matt.might.net/articles/parsing-with-derivatives/ - -============================================================================================= -*/ - -/** - A collection of attributes which must be computed by iteration to a fixed point. + http://matt.might.net/articles/parsing-with-derivatives/ */ -trait Attributed { - private var generation = -1 ; - private var stabilized = false ; - /** - An attribute computable by fixed point. - - @param bottom the bottom of the attribute's lattice. - @param join the lub operation on the lattice. - @param wt the partial order on the lattice. - - */ - abstract class Attribute[A](bottom : A, join : (A,A) => A, wt : (A,A) => Boolean) - { - private var currentValue : A = bottom - private var compute : () => A = null +/** A collection of attributes which must be computed by iteration to a fixed + * point. + */ +trait Attributed { + private var generation = -1 + private var stabilized = false + + /** An attribute computable by fixed point. + * + * @param bottom + * the bottom of the attribute's lattice. + * @param join + * the lub operation on the lattice. + * @param wt + * the partial order on the lattice. + */ + abstract class Attribute[A]( + bottom: A, + join: (A, A) => A, + wt: (A, A) => Boolean + ) { + private var currentValue: A = bottom + private var compute: () => A = null private var fixed = false - /** - Sets the computation the updates this attribute. - - @param computation the computation that updates this attribute. - */ - def := (computation : => A) { - compute = (() => computation) - } - - /** - Permanently fixes the value of this attribute. - - @param value the value of this attribute. - - */ - def :== (value : A) { + /** Sets the computation the updates this attribute. + * + * @param computation + * the computation that updates this attribute. + */ + def :=(computation: => A) = { compute = (() => computation) } + + /** Permanently fixes the value of this attribute. + * + * @param value + * the value of this attribute. + */ + def :==(value: A) = { currentValue = value fixed = true } - /** - Recomputes the value of this attribute. - */ - def update() { - if (fixed) - return ; + /** Recomputes the value of this attribute. + */ + def update(): Unit = { + if (fixed) return val old = currentValue val newValue = compute() - if (!wt(newValue,currentValue)) { - currentValue = join(newValue,currentValue) + if (!wt(newValue, currentValue)) { + currentValue = join(newValue, currentValue) FixedPoint.changed = true } } - /** - The current value of this attribute. - */ - def value : A = { - // When the value of this attribute is requested, there are - // three possible cases: - // - // (1) It's already been computed (this.stabilized); - // (2) It's been manually set (this.fixed); or - // (3) It needs to be computed (generation < FixedPoint.generation). - if (fixed || stabilized || (generation == FixedPoint.generation)) + /** The current value of this attribute. + */ + def value: A = { + /* + When the value of this attribute is requested, there are + three possible cases: + (1) It's already been computed (this.stabilized); + (2) It's been manually set (this.fixed); or + (3) It needs to be computed (generation < FixedPoint.generation). + */ + if (fixed || stabilized || generation == FixedPoint.generation) return currentValue - else - // Run or continue the fixed-point computation: - fix() - - if (FixedPoint.stabilized) - stabilized = true - return currentValue + fix() + if (FixedPoint.stabilized) stabilized = true + currentValue } } // Subsumption tests for attributes: - protected[this] def implies (a : Boolean, b : Boolean) = (!a) || b - protected[this] def follows (a : Boolean, b : Boolean) = (!b) || a - protected[this] def updateAttributes(): Unit + protected def implies(a: Boolean, b: Boolean) = !a || b + protected def follows(a: Boolean, b: Boolean) = !b || a + protected def updateAttributes(): Unit - private def fix() { + private def fix() = { this.generation = FixedPoint.generation if (FixedPoint.master eq null) { - FixedPoint.master = this ; - do { + FixedPoint.master = this + + FixedPoint.generation += 1 + FixedPoint.changed = false + updateAttributes() + while (FixedPoint.changed) { FixedPoint.generation += 1 FixedPoint.changed = false updateAttributes() - } while (FixedPoint.changed) ; - FixedPoint.stabilized = true ; + } + + FixedPoint.stabilized = true FixedPoint.generation += 1 updateAttributes() FixedPoint.reset() - } else { - updateAttributes() - } + } else updateAttributes() } } - -/** - FixedPoint tracks the state of a fixed point algorithm for the attributes of a grammar. - - In case there are fixed points running in multiple threads, each attribute is thread-local. - */ - +/** FixedPoint tracks the state of a fixed point algorithm for the attributes of + * a grammar. + * + * In case there are fixed points running in multiple threads, each attribute + * is thread-local. + */ private object FixedPoint { - private val _stabilized = new ThreadLocal[Boolean] + private val _stabilized = ThreadLocal[Boolean]() _stabilized.set(false) - def stabilized = _stabilized.get ; - def stabilized_= (v : Boolean) { _stabilized.set(v) } + def stabilized = _stabilized.get + def stabilized_=(v: Boolean) = { _stabilized.set(v) } - private val _running = new ThreadLocal[Boolean] + private val _running = ThreadLocal[Boolean]() _running.set(false) - def running = _running.get ; - def running_= (v : Boolean) { _running.set(v) } + def running = _running.get + def running_=(v: Boolean) = { _running.set(v) } - private val _changed = new ThreadLocal[Boolean] + private val _changed = ThreadLocal[Boolean]() _changed.set(false) - def changed = _changed.get ; - def changed_= (v : Boolean) { _changed.set(v) } + def changed = _changed.get + def changed_=(v: Boolean) = { _changed.set(v) } - private val _generation = new ThreadLocal[Int] + private val _generation = ThreadLocal[Int]() _generation.set(0) - def generation = _generation.get ; - def generation_= (v : Int) { _generation.set(v) } + def generation = _generation.get + def generation_=(v: Int) = { _generation.set(v) } - private val _master = new ThreadLocal[Object] + private val _master = ThreadLocal[Object]() _master.set(null) - def master = _master.get ; - def master_= (v : Object) { _master.set(v) } - - /** - Resets all of the fixed point variables for this thread. - */ - def reset () { - this.stabilized = false ; - this.running = false ; - this.master = null ; - this.changed = false ; - this.generation = 0 ; + def master = _master.get + def master_=(v: Object) = { _master.set(v) } + + /** Resets all of the fixed point variables for this thread. + */ + def reset() = { + this.stabilized = false + this.running = false + this.master = null + this.changed = false + this.generation = 0 } } diff --git a/artifact/src/main/scala/library/CharSyntax.scala b/artifact/src/main/scala/library/CharSyntax.scala index a1ed600..d42dfc2 100644 --- a/artifact/src/main/scala/library/CharSyntax.scala +++ b/artifact/src/main/scala/library/CharSyntax.scala @@ -1,61 +1,44 @@ package fcd -import language.implicitConversions - -trait CharSyntax { self: Parsers with DerivedOps with Syntax => - +trait CharSyntax { self: Parsers & DerivedOps & Syntax => type Elem = Char - implicit def charParser(c: Char): Parser[Char] = accept(c) - def notChar(c: Char): Parser[Char] = acceptIf(_ != c) - val char = any - val letter = acceptIf(_.isLetter) - val upper = acceptIf(_.isUpper) - val lower = acceptIf(_.isLower) - val whitespace = acceptIf(_.isWhitespace) - val digit = acceptIf(_.isDigit) + val char = any + val letter = acceptIf(_.isLetter) + val upper = acceptIf(_.isUpper) + val lower = acceptIf(_.isLower) + val whitespace = acceptIf(_.isWhitespace) + val digit = acceptIf(_.isDigit) val letterOrDigit = acceptIf(_.isLetterOrDigit) - val space = acceptIf(_.isSpaceChar) - val spaces = many(space) - val newline = acceptIf(_ == '\n') + val space = acceptIf(_.isSpaceChar) + val spaces = many(space) + val newline = acceptIf(_ == '\n') - def charRange(from: Char, to: Char) = acceptIf { c => c >= from && c <= to } + def charRange(from: Char, to: Char) = acceptIf(c => c >= from && c <= to) - val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') + val asciiLetter = charRange('a', 'z') | charRange('A', 'Z') - def string(s: String): Parser[String] = (acceptSeq(s) map (_.mkString)) + def string(s: String): Parser[String] = acceptSeq(s) ^^ (_.mkString) - sealed trait Stringable[T] { - def apply: T => String - } - object Stringable { - implicit val char: Stringable[Char] = new Stringable[Char] { - def apply = _.toString - } - implicit val charList: Stringable[List[Char]] = new Stringable[List[Char]] { - def apply = _.mkString - } - implicit val string: Stringable[String] = new Stringable[String] { - def apply = identity - } - implicit val stringList: Stringable[List[String]] = new Stringable[List[String]] { - def apply = _.mkString - } - implicit def seq[T: Stringable, U: Stringable]: Stringable[T ~ U] = new Stringable[T ~ U] { - def apply = { case l ~ r => - implicitly[Stringable[T]].apply(l) + implicitly[Stringable[U]].apply(r) - } - } + sealed trait Stringable[T] { def apply: T => String } + + given Stringable[Char] { def apply = _.toString } + given Stringable[List[Char]] { def apply = _.mkString } + given Stringable[String] { def apply = identity } + given stringList: Stringable[List[String]] { def apply = _.mkString } + given [T, U](using st: Stringable[T], su: Stringable[U]): Stringable[(T, U)] + with { + def apply = { case (l, r) => st.apply(l) ++ su.apply(r) } } - implicit def liftString(s: String): Parser[String] = string(s) + given Conversion[String, Parser[String]] = string + given Conversion[List[Char], String] = _.mkString - implicit def charString(cs: List[Char]): String = cs.mkString + given [T](using st: Stringable[T]): Conversion[Parser[T], Parser[String]] = _ ^^ st.apply - implicit def stringParser[T: Stringable](p: Parser[T]): Parser[String] = - p map { v => implicitly[Stringable[T]].apply(v) } + given Conversion[Char, Parser[Char]] = accept def noneOf(s: String): Parser[Char] = acceptIf(t => !(s contains t)) } diff --git a/artifact/src/main/scala/library/DerivativeParsers.scala b/artifact/src/main/scala/library/DerivativeParsers.scala index 47b02fa..23696da 100644 --- a/artifact/src/main/scala/library/DerivativeParsers.scala +++ b/artifact/src/main/scala/library/DerivativeParsers.scala @@ -16,89 +16,82 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def accepts: Boolean def failed: Boolean - def alt[U >: R](q: Parser[U]): Parser[U] = q alt2 p - def alt2[U >: R](q: Parser[U]): Parser[U] = new Alt(q, p) - def and[U](q: Parser[U]): Parser[(R, U)] = q and2 p - def and2[U](q: Parser[U]): Parser[(U, R)] = new And(q, p) - def seq[U](q: Parser[U]): Parser[R ~ U] = q seq2 p - def seq2[U](q: Parser[U]): Parser[U ~ R] = new Seq(q, p) - def flatMap[U](f: R => Parser[U]): Parser[U] = new FlatMap(p, f) + infix def alt[U >: R](q: Parser[U]): Parser[U] = Alt(p, q) + infix def and[U](q: Parser[U]): Parser[(R, U)] = And(p, q) + infix def seq[U](q: Parser[U]): Parser[(R, U)] = new Seq(p, q) + infix def flatMap[U](f: R => Parser[U]): Parser[U] = FlatMap(p, f) def done: Parser[R] = if (accepts) Succeed(p.results) else fail - def not: Parser[Unit] = new Not(p) + def not: Parser[Unit] = Not(p) // the map family - def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = new MapResults(p, f) - def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } - def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) + infix def mapResults[U](f: (=> Results[R]) => Results[U]): Parser[U] = + MapResults(p, f) + infix def map[U](f: R => U): Parser[U] = p mapResults { ress => ress map f } + infix def withResults[U](res: List[U]): Parser[U] = mapResults(_ => res) // for optimization of biased choice def prefix: Parser[Unit] = { - if (accepts) { - always - } else { - eat { el => (p consume el).prefix } - } + if (accepts) always + else eat { el => (p consume el).prefix } } } object Fail extends NullaryPrintable("∅") with Parser[Nothing] { - override def results = List.empty - override def failed = true + override def results = List() + override def failed = true override def accepts = false - override def consume: Elem => this.type = in => this - - override def alt[U >: Nothing](q: Parser[U]): q.type = q - override def alt2[U >: Nothing](q: Parser[U]): q.type = q - override def seq[U](q: Parser[U]): this.type = this - override def seq2[U](q: Parser[U]): this.type = this - override def and[U](q: Parser[U]): this.type = this - override def and2[U](q: Parser[U]): this.type = this - override def map[U](f: Nothing => U): this.type = this - override def flatMap[U](g: Nothing => Parser[U]): this.type = this - override def mapResults[U](f: (=> Results[Nothing]) => Results[U]): this.type = this + override def consume = _ => this + + override def alt[U >: Nothing](q: Parser[U]) = q + override def seq[U](q: Parser[U]) = this + override def and[U](q: Parser[U]) = this + override def map[U](f: Nothing => U) = this + override def flatMap[U](g: Nothing => Parser[U]) = this + override def mapResults[U]( + f: (=> Results[Nothing]) => Results[U] + ) = this override def done = this - override def not: Parser[Unit] = Always + override def not = Always override def prefix = this override def toString: String = "∅" } object Always extends NullaryPrintable("∞") with Parser[Unit] { override def results = List(()) - override def failed = false + override def failed = false override def accepts = true - override def consume = in => Always - override def not: Parser[Unit] = fail - override def and[U](q: Parser[U]): Parser[(Unit, U)] = q map { r => ((), r) } - override def and2[U](q: Parser[U]): Parser[(U, Unit)] = q map { r => (r, ()) } + override def consume = _ => this + override def not = Fail + override def and[U](q: Parser[U]) = q map { ((), _) } // this is a valid optimization, however it almost never occurs. override def alt[U >: Unit](q: Parser[U]) = this - override def alt2[U >: Unit](q: Parser[U]) = this override def toString = "always" } - case class Succeed[R](ress: Results[R]) extends NullaryPrintable("ε") with Parser[R] { p => + case class Succeed[R](ress: Results[R]) + extends NullaryPrintable("ε") + with Parser[R] { p => override def results = ress - override def failed = false + override def failed = false override def accepts = true override def consume = (in: Elem) => fail override def toString = s"ε($ress)" override def done: Parser[R] = this - override def mapResults[T](f: (=> Results[R]) => Results[T]): Parser[T] = Succeed(f(ress)) + override def mapResults[T](f: (=> Results[R]) => Results[T]): Parser[T] = + Succeed(f(ress)) override def seq[U](q: Parser[U]): Parser[R ~ U] = q mapResults { ress2 => for (r <- ress; r2 <- ress2) yield (r, r2) } - override def seq2[U](q: Parser[U]): Parser[U ~ R] = q mapResults { ress2 => - for (r <- ress; r2 <- ress2) yield (r2, r) - } - override def flatMap[U](f: R => Parser[U]): Parser[U] = ress.map(f).reduce(_ alt _) + override def flatMap[U](f: R => Parser[U]): Parser[U] = + ress.map(f).reduce(_ alt _) } case class Accept(elem: Elem) extends Parser[Elem] { - def results = List.empty - def failed = false + def results = List() + def failed = false def accepts = false def consume = (in: Elem) => if (in == elem) { @@ -109,12 +102,15 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => lazy val name = "'" + escape(elem) + "'" def printNode = s"""$id [label="$name", shape=circle]""" - private def escape(c: Elem): String = c.toString.replace("\\", "\\\\").replace("\"", "\\\"") + private def escape(c: Elem): String = + c.toString.replace("\\", "\\\\").replace("\"", "\\\"") } - class AcceptIf(f: Elem => Boolean) extends NullaryPrintable("acceptIf") with Parser[Elem] { - def results = List.empty - def failed = false + class AcceptIf(f: Elem => Boolean) + extends NullaryPrintable("acceptIf") + with Parser[Elem] { + def results = List() + def failed = false def accepts = false def consume = (in: Elem) => if (f(in)) { @@ -124,18 +120,22 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => } } - class Not[R](val p: Parser[R]) extends UnaryPrintable("not", p) with Parser[Unit] { - def results = (if (p.results.isEmpty) List(()) else List.empty) - def failed = false // we never know, this is a conservative approx. + class Not[R](val p: Parser[R]) + extends UnaryPrintable("not", p) + with Parser[Unit] { + def results = (if (p.results.isEmpty) List(()) else List()) + def failed = false // we never know, this is a conservative approx. def accepts = !p.accepts def consume: Elem => Parser[Unit] = in => (p consume in).not override def not = p withResults List(()) override def toString = s"not($p)" } - class Alt[R, U >: R](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("|", p, q) with Parser[U] { + class Alt[R, U >: R](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("|", p, q) + with Parser[U] { def results = (p.results ++ q.results).distinct - def failed = p.failed && q.failed + def failed = p.failed && q.failed def accepts = p.accepts || q.accepts def consume = (in: Elem) => (p consume in) alt (q consume in) @@ -144,42 +144,50 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = s"($p | $q)" } - class Seq[R, U](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("~", p, q) with Parser[R ~ U] { + class Seq[R, U](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("~", p, q) + with Parser[R ~ U] { - def results = (for { r <- p.results; u <- q.results } yield (new ~(r, u))).distinct + def results = (for { r <- p.results; u <- q.results } yield (r, u)).distinct // q.failed forces q, which might not terminate for grammars with // infinite many nonterminals, like: // def foo(p) = 'a' ~ foo(p << 'a') // so we approximate similar to flatmap. - def failed = p.failed // || q.failed + def failed = p.failed // || q.failed def accepts = p.accepts && q.accepts - def consume = (in: Elem) => ((p consume in) seq q) alt (p.done seq (q consume in)) + def consume = (in: Elem) => + ((p consume in) seq q) alt (p.done seq (q consume in)) override def toString = s"($p ~ $q)" // canonicalization rule (1) from PLDI 2016 override def seq[T](r: Parser[T]): Parser[(R ~ U) ~ T] = - (p seq (q seq r)) map { - case (rr ~ (ru ~ rt)) => ((rr, ru), rt) - } + (p seq (q seq r)) map { case (rr, (ru, rt)) => ((rr, ru), rt) } } - class Done[R](val p: Parser[R]) extends UnaryPrintable(s"done", p) with Parser[R] { + class Done[R](val p: Parser[R]) + extends UnaryPrintable(s"done", p) + with Parser[R] { def results = p.results - def failed = p.failed + def failed = p.failed def accepts = p.accepts def consume = (el: Elem) => fail override def done = this override def toString = s"done($p)" } - class MapResults[R, U](val p: Parser[R], f: (=> Results[R]) => Results[U]) extends UnaryPrintable(s"mapResults", p) with Parser[U] { + class MapResults[R, U](val p: Parser[R], f: (=> Results[R]) => Results[U]) + extends UnaryPrintable(s"mapResults", p) + with Parser[U] { // preserve whether p actually has results (f might ignore its argument...) def results = if (p.results.isEmpty) List() else f(p.results).distinct - def failed = p.failed + def failed = p.failed def accepts = p.accepts def consume = (el: Elem) => (p consume el) mapResults f - override def mapResults[T](g: (=> Results[U]) => Results[T]): Parser[T] = p mapResults { res => g(f(res)) } - override def map[T](g: U => T): Parser[T] = p mapResults { res => f(res) map g } + override def mapResults[T](g: (=> Results[U]) => Results[T]): Parser[T] = + p mapResults { res => g(f(res)) } + override def map[T](g: U => T): Parser[T] = p mapResults { res => + f(res) map g + } override def done = p.done mapResults f // we can forget the results here. @@ -189,36 +197,34 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // canonicalization rule (2) from PLDI 2016 // allows for instance rewriting (always.map(f) & p) -> p.map(...f...) override def seq[S](q: Parser[S]): Parser[U ~ S] = - (p seq q).mapResults(rss => rss.unzip match { - case (us, ss) => f(us) zip ss - }) - override def seq2[S](q: Parser[S]): Parser[S ~ U] = - (p seq2 q).mapResults(rss => rss.unzip match { - case (ss, us) => ss zip f(us) - }) + (p seq q).mapResults(rss => + rss.unzip match { case (us, ss) => f(us) zip ss } + ) override def and[S](q: Parser[S]): Parser[(U, S)] = - (p and q).mapResults(rss => rss.unzip match { - case (us, ss) => f(us) zip ss - }) - override def and2[S](q: Parser[S]): Parser[(S, U)] = - (p and2 q).mapResults(rss => rss.unzip match { - case (ss, us) => ss zip f(us) - }) + (p and q).mapResults(rss => + rss.unzip match { case (us, ss) => f(us) zip ss } + ) } - class And[R, U](val p: Parser[R], val q: Parser[U]) extends BinaryPrintable("&", p, q) with Parser[(R, U)] { - def results = (for { r <- p.results; u <- q.results } yield ((r, u))).distinct - def failed = p.failed || q.failed + class And[R, U](val p: Parser[R], val q: Parser[U]) + extends BinaryPrintable("&", p, q) + with Parser[(R, U)] { + def results = + (for { r <- p.results; u <- q.results } yield ((r, u))).distinct + def failed = p.failed || q.failed def accepts = p.accepts && q.accepts def consume = (in: Elem) => (p consume in) and (q consume in) override def not = p.not alt q.not override def toString = s"($p & $q)" } - class FlatMap[R, U](val p: Parser[R], f: R => Parser[U]) extends UnaryPrintable("flatMap", p) with Parser[U] { - def results = ((p.results map f) flatMap (_.results)).distinct //res().distinct + class FlatMap[R, U](val p: Parser[R], f: R => Parser[U]) + extends UnaryPrintable("flatMap", p) + with Parser[U] { + def results = + ((p.results map f) flatMap (_.results)).distinct // res().distinct def accepts = !results.isEmpty - def failed = p.failed // that's the best we know + def failed = p.failed // that's the best we know def consume: Elem => Parser[U] = in => { val next = (p consume in) flatMap f @@ -228,49 +234,50 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => override def toString = "flatMap" } - class Nonterminal[+R](_p: => Parser[R]) extends Parser[R] { + class Nonterminal[R](_p: => Parser[R]) extends Parser[R] { lazy val p = _p - def accepts: Boolean = propertiesFix.nullable.value - def failed: Boolean = propertiesFix.empty.value + def accepts: Boolean = propertiesFix.nullable.value + def failed: Boolean = propertiesFix.empty.value def results: Results[R] = resultsFix.results.value // This separation into two fixed points is essential to // prevent excessive recomputation. - protected[this] object propertiesFix extends Attributed { - object nullable extends Attribute[Boolean](false,_ || _,implies) - object empty extends Attribute[Boolean](true,_ && _,follows) + private object propertiesFix extends Attributed { + object nullable extends Attribute[Boolean](false, _ || _, implies) + object empty extends Attribute[Boolean](true, _ && _, follows) - empty := p.failed - nullable := p.accepts + empty := p.failed + nullable := p.accepts - override protected[this] def updateAttributes() { + override protected def updateAttributes() = { empty.update() nullable.update() } } - protected[this] object resultsFix extends Attributed { - object results extends Attribute[List[R]]( - List.empty, - (nw, ol) => (nw ++ ol).distinct, - (nw, ol) => nw.toSet.subsetOf(ol.toSet)) + private object resultsFix extends Attributed { + object results + extends Attribute[List[R]]( + List(), + (nw, ol) => (nw ++ ol).distinct, + (nw, ol) => nw.toSet.subsetOf(ol.toSet) + ) results := p.results - override protected[this] def updateAttributes() { - results.update() - } + override protected def updateAttributes() = results.update() } - private[this] val cache: mutable.ListMap[Elem, Parser[R]] = mutable.ListMap.empty + private val cache: mutable.HashMap[Elem, Parser[R]] = mutable.HashMap() // Wrapping in `nonterminal` is cecessary for left-recursive // grammars and for grammars like "DerivativeParsers / preprocessor" // that recursively derive. Optimizing the nonterminal node away causes // divergence on these grammars. Worse, in the latter case // forcing `next` will already cause divergence. override def consume: Elem => Parser[R] = el => - cache.getOrElseUpdate(el, + cache.getOrElseUpdate( + el, if (p.failed) fail else @@ -282,7 +289,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => this } var name = "nt" - private val rec = new DynamicVariable[Boolean](false) + private val rec = DynamicVariable[Boolean](false) override def toString = if (rec.value) s"nt(${System.identityHashCode(this)})" @@ -304,7 +311,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => val fail: Parser[Nothing] = Fail val always: Parser[Unit] = Always def succeed[R](res: R): Parser[R] = Succeed(List(res)) - def acceptIf(cond: Elem => Boolean): Parser[Elem] = new AcceptIf(cond) + def acceptIf(cond: Elem => Boolean): Parser[Elem] = AcceptIf(cond) // combinators with parser arguments def not[R](p: Parser[R]): Parser[Unit] = p.not @@ -313,7 +320,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def alt[R, U >: R](p: Parser[R], q: Parser[U]) = p alt q def seq[R, U](p: Parser[R], q: Parser[U]) = p seq q - def and[R, U](p: Parser[R], q: Parser[U]): Parser[(R, U)] = p and q + def and[R, U](p: Parser[R], q: Parser[U]) = p and q def feed[R](in: Elem, p: => Parser[R]) = p consume in @@ -321,11 +328,14 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => def done[T](p: Parser[T]): Parser[T] = p.done - override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = new Nonterminal(_p) - def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = new Nonterminal(_p).named(name) + override def nonterminal[R](_p: => Parser[R]): Nonterminal[R] = + Nonterminal(_p) + def nonterminal[R](name: String)(_p: => Parser[R]): Nonterminal[R] = + Nonterminal(_p).named(name) def feed[R](p: Parser[R], in: Elem) = p.consume(in) - def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = feedAll(p, in).results + def parse[R](p: Parser[R], in: Iterable[Elem]): Results[R] = + feedAll(p, in).results // for testing override def isSuccess[R](p: Parser[R]): Boolean = p.accepts @@ -333,7 +343,7 @@ trait DerivativeParsers extends Parsers { self: DerivedOps => // optimization: Once p accepts, p as a prefix will always accept. // often used to implement biased choice: (not(prefix(p)) &> q - override def prefix: Parser[Any] => Parser[Unit] = p => p.prefix + override def prefix: Parser[Any] => Parser[Unit] = _.prefix } object DerivativeParsers extends RichParsers with DerivativeParsers { diff --git a/artifact/src/main/scala/library/DerivedOps.scala b/artifact/src/main/scala/library/DerivedOps.scala index 37ed254..9c4329d 100644 --- a/artifact/src/main/scala/library/DerivedOps.scala +++ b/artifact/src/main/scala/library/DerivedOps.scala @@ -1,6 +1,8 @@ package fcd -trait DerivedOps { self: Parsers with Syntax => +import scala.language.implicitConversions + +trait DerivedOps { self: Parsers & Syntax => val any: Parser[Elem] = acceptIf(_ => true) @@ -8,19 +10,20 @@ trait DerivedOps { self: Parsers with Syntax => def no(t: Elem): Parser[Elem] = acceptIf(_ != t) - def acceptSeq[ES <% Iterable[Elem]](es: ES): Parser[List[Elem]] = + def acceptSeq(es: Iterable[Elem]): Parser[List[Elem]] = es.foldRight[Parser[List[Elem]]](succeed(Nil)) { (x, pxs) => - accept(x) ~ pxs map mkList + accept(x) ~ pxs ^^ mkList } def some[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v = seq(p, many_v) ^^ mkList some_v } + def many[T](p: Parser[T]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v = seq(p, many_v) ^^ mkList many_v } @@ -29,25 +32,25 @@ trait DerivedOps { self: Parsers with Syntax => // def always[T](t: T): Parser[T] = // many(any) map { _ => t } - def oneOf[ES <% Iterable[Elem]](s: ES): Parser[Elem] = acceptIf { - t => s.exists(_ == t) + def oneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => + s.exists(_ == t) } - def noneOf[ES <% Iterable[Elem]](s: ES): Parser[Elem] = acceptIf { - t => s.forall(_ != t) + def noneOf(s: Iterable[Elem]): Parser[Elem] = acceptIf { t => + s.forall(_ != t) } def opt[T](p: Parser[T]): Parser[Option[T]] = - alt(p map { r => Some(r) }, succeed(None)) + alt(p ^^ { r => Some(r) }, succeed(None)) def manyN[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else p ~ manyN(n - 1, p) map { case r ~ rs => r :: rs } + else p ~ manyN(n - 1, p) ^^ mkList } def atMost[T](n: Int, p: Parser[T]): Parser[List[T]] = { if (n == 0) succeed(Nil) - else (p ~ atMost(n - 1, p) map { case r ~ rs => r :: rs }) | succeed(Nil) + else (p ~ atMost(n - 1, p) ^^ mkList) | succeed(Nil) } def manySep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { @@ -57,45 +60,36 @@ trait DerivedOps { self: Parsers with Syntax => // same optimization as above for many and some def someSep[T](p: Parser[T], sep: Parser[Any]): Parser[List[T]] = { lazy val many_v: NT[List[T]] = alt(sep ~> some_v, succeed(Nil)) - lazy val some_v: Parser[List[T]] = seq(p, many_v) map { case p ~ ps => p :: ps } + lazy val some_v = seq(p, many_v) ^^ mkList some_v } - def manyCount(p: Parser[Any]): Parser[Int] = - many(p) map { _.size } - - def someCount(p: Parser[Any]): Parser[Int] = - some(p) map { _.size } + def manyCount(p: Parser[Any]): Parser[Int] = many(p) ^^ { _.size } + def someCount(p: Parser[Any]): Parser[Int] = some(p) ^^ { _.size } // distributive law - chains a list of parsers // --> in Haskell one would use `traverse` def distr[T](ps: List[Parser[T]]): Parser[List[T]] = - ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - (p ~ l) map { case a ~ b => a :: b } - } + ps.foldRight(succeed[List[T]](Nil)) { (p, l) => (p ~ l) ^^ mkList } - def join[T](p: Parser[Parser[T]]): Parser[T] = p flatMap done + def join[T](p: Parser[Parser[T]]): Parser[T] = p >> done // A parser that captures the tokens consumed by `p` - def consumed[T](p: Parser[T]): Parser[List[Elem]] = - many(any) <& p + def consumed[T](p: Parser[T]): Parser[List[Elem]] = many(any) <& p - def eat[R](f: Elem => Parser[R]): Parser[R] = - any >> f + def eat[R](f: Elem => Parser[R]): Parser[R] = any >> f def delegate[T](p: Parser[T]): Parser[Parser[T]] = succeed(p) | eat { c => delegate(p << c) } def delegateN[T](n: Int, p: Parser[T]): Parser[Parser[T]] = - if (n <= 0) - succeed(p) - else - eat { c => delegateN(n - 1, p << c) } + if (n <= 0) succeed(p) + else eat { c => delegateN(n - 1, p << c) } // collects the results of parsers def collect[T](ps: List[Parser[T]]): Parser[List[T]] = ps.foldRight(succeed[List[T]](Nil)) { (p, l) => - done(p) >> { r => l.map(r :: _) } + done(p) >> { r => l ^^ (r :: _) } } def includes[T](p: Parser[T]): Parser[T] = @@ -105,20 +99,22 @@ trait DerivedOps { self: Parsers with Syntax => // described by the function `f`. def repeat[T](f: Parser[T] => Parser[Parser[T]]): Parser[T] => Parser[T] = { val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - done(p) | nonterminal(f(p) >> rec) - }) + def rec(p: Parser[T]): Parser[T] = + cache.getOrElseUpdate( + p, + { done(p) | nonterminal(f(p) >> rec) } + ) rec } // repeat is just an instance of repeatAll - def repeatAll[T](f: List[Parser[T]] => Parser[List[Parser[T]]]): List[Parser[T]] => Parser[List[T]] = ps => - collect(ps) | f(ps) >> repeatAll(f) + def repeatAll[T](f: List[Parser[T]] => Parser[List[Parser[T]]])( + ps: List[Parser[T]] + ): Parser[List[T]] = collect(ps) | f(ps) >> repeatAll(f) - private def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } + private def mkList[T](xs: (T, List[T])) = xs._1 :: xs._2 - val succeedForever: NT[Unit] = - succeed(()) | (any ~> succeedForever) + lazy val succeedForever: NT[Unit] = succeed(()) | (any ~> succeedForever) def rightDerivative[R](p: Parser[R], elem: Elem): Parser[R] = done(p << elem) | eat { c => rightDerivative(p << c, elem) } @@ -132,12 +128,10 @@ trait DerivedOps { self: Parsers with Syntax => def lookahead[T](p: Parser[Any], q: Parser[T]): Parser[T] = not(prefix(p)) &> q - //consumed(p) >> { in => q <<< in } - + // consumed(p) >> { in => q <<< in } // some extension point for optimization - def prefix: Parser[Any] => Parser[Unit] = p => p ~> always - + def prefix: Parser[Any] => Parser[Unit] = _ ~> always // per-element action performed on p def rep[T](f: Elem => Parser[T] => Parser[T]) = @@ -147,8 +141,7 @@ trait DerivedOps { self: Parsers with Syntax => def filter[T](pred: Elem => Boolean): Parser[T] => Parser[T] = rep(el => p => if (pred(el)) (p << el) else p) - def skip[T]: Parser[T] => Parser[T] = - rep(el => p => p) + def skip[T]: Parser[T] => Parser[T] = rep(el => p => p) def mapIn[T](f: Elem => Elem): Parser[T] => Parser[T] = rep(el => p => p << f(el)) @@ -156,37 +149,39 @@ trait DerivedOps { self: Parsers with Syntax => def mapInPartial[T](f: PartialFunction[Elem, Elem]): Parser[T] => Parser[T] = mapIn(f orElse { case x => x }) - def inRegion[T](region: Parser[Any], f: Parser[Parser[T]] => Parser[Parser[T]]): Parser[T] => Parser[T] = { + def inRegion[T]( + region: Parser[Any], + f: Parser[Parser[T]] => Parser[Parser[T]] + ): Parser[T] => Parser[T] = { - // to prevent accessive re-parsing we introduce some caching on this - // parser combinator here. - val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] - - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - - lazy val dp = delegate(p) - nonterminal ( - done(p) | biasedAlt( - region &> f(dp) >> rec, - (any &> dp) >> rec)) - }) - rec - } + // to prevent accessive re-parsing we introduce some caching on this + // parser combinator here. + val cache = scala.collection.mutable.WeakHashMap.empty[Parser[T], Parser[T]] + def rec(p: Parser[T]): Parser[T] = + cache.getOrElseUpdate( + p, { + lazy val dp = delegate(p) + nonterminal( + done(p) | biasedAlt(region &> f(dp) >> rec, (any &> dp) >> rec) + ) + } + ) + rec + } // Greedy repetition - def greedyMany[T](p: Parser[T]): Parser[List[T]] = greedySome(p) | succeed(Nil) + def greedyMany[T](p: Parser[T]) = greedySome(p) | succeed(Nil) // Instead of a class use a closure: def greedySome[T]: Parser[T] => NT[List[T]] = { p => - def withNext(p: Parser[T], ps: Parser[List[T]]): Parser[List[T]] = - done(p) ~ ps ^^ { case t ~ ts => t :: ts } + def withNext(p: Parser[T], ps: Parser[List[T]]) = + done(p) ~ ps ^^ mkList def forceRead(curr: Parser[T]): Parser[List[T]] = withNext(curr, succeed(Nil)) | eat { el => - biasedAlt( forceRead(curr << el), - withNext(curr, greedySome(p) << el)) + biasedAlt(forceRead(curr << el), withNext(curr, greedySome(p) << el)) } forceRead(p) diff --git a/artifact/src/main/scala/library/Parsers.scala b/artifact/src/main/scala/library/Parsers.scala index 14c0bdb..d5fb305 100644 --- a/artifact/src/main/scala/library/Parsers.scala +++ b/artifact/src/main/scala/library/Parsers.scala @@ -1,7 +1,5 @@ package fcd -import language.higherKinds - trait Parsers { // the token type (`Elem`) and the type of the results are left abstract @@ -42,19 +40,12 @@ trait Parsers { // For testing def isSuccess[R](p: Parser[R]): Boolean = !isFailure(p) def isFailure[R](p: Parser[R]): Boolean = !isSuccess(p) - def accepts[R, ES <% Iterable[Elem]](p: Parser[R], s: ES): Boolean = isSuccess(feedAll(p, s)) + def accepts[R](p: Parser[R], s: Iterable[Elem]): Boolean = isSuccess( + feedAll(p, s) + ) // As optimization def always: Parser[Unit] } -trait RichParsers extends Parsers with Syntax with DerivedOps with CharSyntax - -// A trait to bake parsers in a nested cake -trait ParserUsage { - // Override _parsers in concrete tests suites with the - // appropriate parser implementation. - type Parsers - def _parsers: Parsers - lazy val parsers: Parsers = _parsers -} +trait RichParsers extends Parsers, Syntax, DerivedOps, CharSyntax diff --git a/artifact/src/main/scala/library/Printable.scala b/artifact/src/main/scala/library/Printable.scala index 52651b6..abd8654 100644 --- a/artifact/src/main/scala/library/Printable.scala +++ b/artifact/src/main/scala/library/Printable.scala @@ -18,7 +18,6 @@ trait Printable { results{results.toSet.mkString(", ")} - private lazy val printGraph: String = s"""strict digraph G { | ${printNode} @@ -26,7 +25,7 @@ trait Printable { |""".stripMargin('|') def printToFile(path: String): Unit = { - val is = new ByteArrayInputStream(printGraph.getBytes("UTF-8")) + val is = ByteArrayInputStream(printGraph.getBytes("UTF-8")) (s"dot -Tpng -o $path" #< is) ! } @@ -39,7 +38,8 @@ abstract class NullaryPrintable(val name: String) extends Printable { def printNode = s"""$id [label="$name", shape=circle]""" } -abstract class UnaryPrintable(val name: String, _p: => Printable) extends Printable { +abstract class UnaryPrintable(val name: String, _p: => Printable) + extends Printable { private lazy val p = _p def printNode = s""" ${id} [shape=none, fontsize=8, fontname=mono, label=<$table>]; @@ -47,7 +47,8 @@ abstract class UnaryPrintable(val name: String, _p: => Printable) extends Printa |${p.printNode}""".stripMargin('|') } -abstract class BinaryPrintable(val name: String, p: Printable, q: Printable) extends Printable { +abstract class BinaryPrintable(val name: String, p: Printable, q: Printable) + extends Printable { def printNode = s""" ${id} [shape=none, fontsize=8, fontname=mono, label=<$table>]; | ${id}:sw -> ${p.id} diff --git a/artifact/src/main/scala/library/Syntax.scala b/artifact/src/main/scala/library/Syntax.scala index 1a59b9e..9ff53a8 100644 --- a/artifact/src/main/scala/library/Syntax.scala +++ b/artifact/src/main/scala/library/Syntax.scala @@ -1,62 +1,67 @@ package fcd -import language.implicitConversions - -trait Syntax { self: Parsers with DerivedOps => - - implicit class ParserOps[R, P <% Parser[R]](p: P) { - def <<(in: Elem): Parser[R] = self.feed(p, in) - def <<<(in: Seq[Elem]): Parser[R] = self.feedAll(p, in) - def parse(s: Seq[Elem]) = self.parse(p, s) - - def map[U](f: R => U): Parser[U] = self.map(p, f) - def flatMap[U](f: R => Parser[U]): Parser[U] = self.flatMap(p, f) - +trait Syntax { self: Parsers & DerivedOps => + extension [R](p: Parser[R]) { + def <<(in: Elem) = feed(p, in) + def <<<(in: Seq[Elem]) = feedAll(p, in) def ~[U](q: Parser[U]) = seq(p, q) - def ~>[U](q: Parser[U]) = seq(p, q) map { case (a, b) => b } - def <~[U](q: Parser[U]) = seq(p, q) map { case (a, b) => a } - + def <~[U](q: Parser[U]) = map(seq(p, q), _._1) + def ~>[U](q: Parser[U]) = map(seq(p, q), _._2) def |[U >: R](q: Parser[U]) = alt(p, q) - def &[U](q: Parser[U]) = and(p, q) - def <&[U](q: Parser[U]) = and(p, q) map { _._1 } - def &>[U](q: Parser[U]) = and(p, q) map { _._2 } + def <&[U](q: Parser[U]) = map(and(p, q), _._1) + def &>[U](q: Parser[U]) = map(and(p, q), _._2) // biased Alternative def <|[U >: R](q: Parser[U]) = biasedAlt(p, q) def |>[U >: R](q: Parser[U]) = biasedAlt(q, p) - def ^^[U](f: R => U): Parser[U] = p map f - def ^^^[U](u: => U): Parser[U] = p map { _ => u } - - def >>[U](f: R => Parser[U]): Parser[U] = p flatMap f + def ^^[U](f: R => U) = map(p, f) + def ^^^[U](u: => U) = map(p, _ => u) + def >>[U](f: R => Parser[U]) = flatMap(p, f) def ? = opt(p) def * = many(p) def + = some(p) } - implicit def liftToParsers[R, U](p: Parser[R])(implicit conv: R => U): Parser[U] = - p map { conv } + given liftToParser[R, U](using + conv: R => U + ): Conversion[Parser[R], Parser[U]] = map(_, conv) // tag nonterminals - this allows automatic insertion of nt-markers final case class NT[+R](parser: Parser[R]) - implicit def toParser[R](nt: NT[R]): Parser[R] = nt.parser + given [R]: Conversion[NT[R], Parser[R]] = _.parser + + import scala.language.implicitConversions implicit def toNT[R](parser: => Parser[R]): NT[R] = NT(nonterminal(parser)) - implicit def tupleSeq2[T1, T2, O](f: (T1, T2) => O): (T1 ~ T2) => O = { - case t1 ~ t2 => f(t1, t2) - } - implicit def tupleSeq3[T1, T2, T3, O](f: (T1, T2, T3) => O): (T1 ~ T2 ~ T3) => O = { - case t1 ~ t2 ~ t3 => f(t1, t2, t3) + given tupleSeq3[T1, T2, T3, O] + : Conversion[(T1, T2, T3) => O, (T1 ~ T2 ~ T3) => O] with { + def apply(f: (T1, T2, T3) => O) = { case ((t1, t2), t3) => f(t1, t2, t3) } } - implicit def tupleSeq4[T1, T2, T3, T4, O](f: (T1, T2, T3, T4) => O): (T1 ~ T2 ~ T3 ~ T4) => O = { - case t1 ~ t2 ~ t3 ~ t4 => f(t1, t2, t3, t4) + + given tupleSeq4[T1, T2, T3, T4, O] + : Conversion[(T1, T2, T3, T4) => O, (T1 ~ T2 ~ T3 ~ T4) => O] with { + def apply(f: (T1, T2, T3, T4) => O) = { case (((t1, t2), t3), t4) => + f(t1, t2, t3, t4) + } } - implicit def tupleSeq5[T1, T2, T3, T4, T5, O](f: (T1, T2, T3, T4, T5) => O): (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 => f(t1, t2, t3, t4, t5) + + given tupleSeq5[T1, T2, T3, T4, T5, O] + : Conversion[(T1, T2, T3, T4, T5) => O, (T1 ~ T2 ~ T3 ~ T4 ~ T5) => O] + with { + def apply(f: (T1, T2, T3, T4, T5) => O) = { + case ((((t1, t2), t3), t4), t5) => f(t1, t2, t3, t4, t5) + } } - implicit def tupleSeq6[T1, T2, T3, T4, T5, T6, O](f: (T1, T2, T3, T4, T5, T6) => O): (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O = { - case t1 ~ t2 ~ t3 ~ t4 ~ t5 ~ t6 => f(t1, t2, t3, t4, t5, t6) + + given tupleSeq6[T1, T2, T3, T4, T5, T6, O]: Conversion[ + (T1, T2, T3, T4, T5, T6) => O, + (T1 ~ T2 ~ T3 ~ T4 ~ T5 ~ T6) => O + ] with { + def apply(f: (T1, T2, T3, T4, T5, T6) => O) = { + case (((((t1, t2), t3), t4), t5), t6) => f(t1, t2, t3, t4, t5, t6) + } } } diff --git a/artifact/src/test/scala/BasicCombinatorsTest.scala b/artifact/src/test/scala/BasicCombinatorsTest.scala index b6706f7..416813a 100644 --- a/artifact/src/test/scala/BasicCombinatorsTest.scala +++ b/artifact/src/test/scala/BasicCombinatorsTest.scala @@ -1,84 +1,86 @@ package fcd package test -import org.scalatest._ +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -trait BasicCombinatorTests extends CustomMatchers { self: FunSpec with Matchers => +trait BasicCombinatorTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => - import parsers._ + import parsers.{succeed as succ, *} describe("parser \"abc\"") { val p = 'a' ~ 'b' ~ 'c' - p shouldParse "abc" - p shouldNotParse "abcd" + p `shouldParse` "abc" + p `shouldNotParse` "abcd" } describe("parser \"ab | ac\"") { val p = ('a' ~ 'b') | ('a' ~ 'c') - p shouldParse "ab" - p shouldParse "ac" - p shouldNotParse "bc" - p shouldNotParse "a" - p shouldNotParse "abc" + p `shouldParse` "ab" + p `shouldParse` "ac" + p `shouldNotParse` "bc" + p `shouldNotParse` "a" + p `shouldNotParse` "abc" } describe("parser \"baaa | ba\"") { - val p: Parser[_] = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' - p shouldParse "baaa" - p shouldParse "ba" - ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "bac" - ((p ~ 'c' ~ 'o') | (p ~ 'c')) shouldParse "baco" + val p = ('b' ~ 'a' ~ 'a' ~ 'a') | 'b' ~ 'a' + p `shouldParse` "baaa" + p `shouldParse` "ba" + ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "bac" + ((p ~ 'c' ~ 'o') | (p ~ 'c')) `shouldParse` "baco" } describe("parser \"(baaa | ba) aa\"") { - val p: Parser[_] = ("baaa" | "ba") ~ "aa" - p shouldParse "baaaaa" - p shouldParse "baaa" + val p = ("baaa" | "ba") ~ "aa" + p `shouldParse` "baaaaa" + p `shouldParse` "baaa" } describe("parser \"succeed(a) b\"") { - val p = succeed('a') ~ 'b' - p shouldParse "b" - p shouldNotParse "" + val p = succ('a') ~ 'b' + p `shouldParse` "b" + p `shouldNotParse` "" } describe("parser \"succeed(a) succeed(b)\"") { - val p = succeed('a') ~ succeed('b') - p shouldParse "" + val p = succ('a') ~ succ('b') + p `shouldParse` "" } describe("parser \"succeed(a) | succeed(b)\"") { - val p = succeed('a') | succeed('b') - p shouldParse "" + val p = succ('a') | succ('b') + p `shouldParse` "" } describe("parser \"(a a a | a a)+") { - val p: Parser[_] = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' - describe("some(_)") { some(p) shouldParse "aaaa" } - describe("_ ~ 'b'") { (p ~ 'b') shouldParse "aaab" } + val p = 'a' ~ 'a' ~ 'a' | 'a' ~ 'a' + describe("some(_)") { some(p) `shouldParse` "aaaa" } + describe("_ ~ 'b'") { (p ~ 'b') `shouldParse` "aaab" } describe("some(_) ~ 'b'") { - (some(p) ~ 'b') shouldParse "aab" - (some(p) ~ 'b') shouldParse "aaab" - (some(p) ~ 'b') shouldParse "aaaaab" + (some(p) ~ 'b') `shouldParse` "aab" + (some(p) ~ 'b') `shouldParse` "aaab" + (some(p) ~ 'b') `shouldParse` "aaaaab" } describe("some(_ ~ 'a') ~ 'b'") { - (some(p ~ 'a') ~ 'b') shouldParse "aaaab" - (some(p ~ 'a') ~ 'b') shouldParse "aaab" + (some(p ~ 'a') ~ 'b') `shouldParse` "aaaab" + (some(p ~ 'a') ~ 'b') `shouldParse` "aaab" } } describe("parser \"'a'+\"") { val p = some('a') - val largeInput = "a" * 100 + val largeInput = List.fill(100)('a').mkString - p shouldParse "a" - p shouldParse "aaaaaa" - p shouldParse largeInput - p shouldNotParse "" - p shouldNotParse ("b" + largeInput) - p shouldNotParse (largeInput + "b") + p `shouldParse` "a" + p `shouldParse` "aaaaaa" + p `shouldParse` largeInput + p `shouldNotParse` "" + p `shouldNotParse` "b" + largeInput + p `shouldNotParse` largeInput + "b" } } diff --git a/artifact/src/test/scala/CustomMatchers.scala b/artifact/src/test/scala/CustomMatchers.scala index d5b0f79..8deb129 100644 --- a/artifact/src/test/scala/CustomMatchers.scala +++ b/artifact/src/test/scala/CustomMatchers.scala @@ -1,40 +1,38 @@ package fcd package test -import org.scalatest._ -import org.scalatest.matchers._ +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.matchers.{BeMatcher, MatchResult} +import org.scalatest.Tag -trait CustomMatchers { self: FunSpec with Matchers => +trait CustomMatchers[+P <: Parsers](val parsers: P) extends Matchers { + self: AnyFunSpec => - // Due to initialization problems we have to use this pattern - // of def and lazy val. - // - // Override _parsers in concrete tests suites with the - // appropriate parser implementation. - type Parsers = RichParsers - def _parsers: RichParsers - lazy val parsers = _parsers - import parsers.{ Results, isSuccess, Parser, accepts, Elem } + import parsers.{Elem, Parser, accepts, isSuccess, parse} - implicit class ParserTests[T, P <% Parser[T]](p: => P) { - def shouldParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should parse "$s" """, tags:_*) { - accepts(p, s) shouldBe true + extension [T](p: => Parser[T]) { + def shouldParse(s: Iterable[Elem], tags: Tag*) = + it(s"""should parse "$s" """, tags*) { + accepts(p, s) `shouldBe` true } - def shouldNotParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should not parse "$s" """, tags:_*) { - accepts(p, s) shouldBe false + def shouldNotParse(s: Iterable[Elem], tags: Tag*) = + it(s"""should not parse "$s" """, tags*) { + accepts(p, s) `shouldBe` false + } + // for unambiguous parses + def shouldParseWith(s: Iterable[Elem], result: T) = + it(s"""should parse "$s" with correct result""") { + parse(p, s) `shouldBe` List(result) } } - class SuccessMatcher extends BeMatcher[Parser[_]] { - def apply(left: Parser[_]) = + class SuccessMatcher[T] extends BeMatcher[Parser[T]] { + def apply(left: Parser[T]) = MatchResult( isSuccess(left), left.toString + " was not successful", left.toString + " was successful" ) } - lazy val successful = new SuccessMatcher - lazy val failure = not (successful) } diff --git a/artifact/src/test/scala/DerivativeParsersTests.scala b/artifact/src/test/scala/DerivativeParsersTests.scala index c1fc23d..93b7629 100644 --- a/artifact/src/test/scala/DerivativeParsersTests.scala +++ b/artifact/src/test/scala/DerivativeParsersTests.scala @@ -1,24 +1,25 @@ package fcd package test -import org.scalatest._ import scala.language.higherKinds import language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers +class DerivativeParsersTests + extends AnyFunSpec + with CustomMatchers(paper) with BasicCombinatorTests with NegationTests - with LeftrecTests - with Section3 with Section4 with Section7 { - - def _parsers: DerivativeParsers.type = DerivativeParsers - override lazy val parsers: DerivativeParsers.type = _parsers - - import parsers._ - - // it is necessary to rename some combinators since names are already - // bound by scala test. - import parsers.{ fail => err, noneOf => nonOf, oneOf => one, not => neg } + with LeftrecTests { + + import parsers.{ + succeed as succ, + not as neg, + fail as err, + noneOf as nonOf, + oneOf as onOf, + * + } // This test illustrates how to write graph representations of the // parsers to a file. (To execute it replace `ignore` by `describe` and @@ -26,50 +27,48 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers describe("printing graph representations of parsers") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A A.printToFile("test.png") } describe("Examples in section 3") { import section_3_2._ - number shouldParse "42" + number `shouldParse` "42" } describe("Indentation with feed") { import section_3_4_improved._ val xs = many(some('x') ~ '\n') - indented(xs) shouldParse " xxx\n xxxx\n" - indented(xs) shouldParse " xxxxxxxxxx\n xxxxxxxxxx\n" + indented(xs) `shouldParse` " xxx\n xxxx\n" + indented(xs) `shouldParse` " xxxxxxxxxx\n xxxxxxxxxx\n" lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) - stmt shouldParse "while (true):\n xxxxx\n xxxxx\n" - stmt shouldParse "while (true):\n while (true):\n xxxxx\n xxxx\n" + stmt `shouldParse` "while (true):\n xxxxx\n xxxxx\n" + stmt `shouldParse` "while (true):\n while (true):\n xxxxx\n xxxx\n" } describe("Indentation with delegation") { import section_3_5_improved._ val xs = many(some('x') ~ '\n') - indented(xs) shouldParse " xxx\n xxxx\n" - indented(xs) shouldParse " xxxxxxxxxx\n xxxxxxxxxx\n" + indented(xs) `shouldParse` " xxx\n xxxx\n" + indented(xs) `shouldParse` " xxxxxxxxxx\n xxxxxxxxxx\n" lazy val stmt: NT[Any] = ("while" ~ space ~ "(true):" ~ block - | some('x') ~ '\n' - ) + | some('x') ~ '\n') lazy val stmts = many(stmt) lazy val block: NT[Any] = '\n' ~ indented(stmts) - stmt shouldParse "while (true):\n xxxxx\n xxxxx\n" - stmt shouldParse "while (true):\n while (true):\n xxxxx\n xxxx\n" + stmt `shouldParse` "while (true):\n xxxxx\n xxxxx\n" + stmt `shouldParse` "while (true):\n while (true):\n xxxxx\n xxxx\n" } describe("Simplified tables for paper") { @@ -77,20 +76,18 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers lazy val xs = many(some('x') ~ '\n') - table(xs) shouldParse """+---+ - ^|xxx| - ^+---+ - ^""".stripMargin('^') - - table(xs) shouldParse """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') + table(xs) `shouldParse` "+---+\n|xxx|\n+---+\n" + + table(xs) `shouldParse` + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') } describe("Table parser with delegation") { @@ -98,117 +95,123 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers type Layout = List[Int] // A parser computing the table layout - lazy val head: Parser[Layout] = some('+'~> manyCount('-')) <~ '+' <~ '\n' - - - def table[T](content: Parser[T]): Parser[List[List[T]]] = head >> { layout => - // After knowing the layout the row-separators are fixed - val rowSeparator = layout.map { n => ("-" * n) + "+" }.foldLeft("+")(_+_) ~ '\n' - val initCells = layout.map { _ => content } - - // one line of a cell, given a fixed width. - def cell: Int => Parser[T] => Parser[Parser[T]] = width => p => - (delegateN(width, p) <~ '|') ^^ { p => p << '\n' } - - // repeatAll is like repeat, but with a list of parsers as the state. - val row = repeatAll[T] { ps => - '|' ~> distr(zipWith(layout map cell, ps)) <~ '\n' - } - - some(row(initCells) <~ rowSeparator) + lazy val head: Parser[Layout] = some('+' ~> manyCount('-')) <~ '+' <~ '\n' + + def table[T](content: Parser[T]): Parser[List[List[T]]] = head >> { + layout => + // After knowing the layout the row-separators are fixed + val rowSeparator = + layout + .map { n => List.fill(n)('-').mkString + "+" } + .foldLeft("+")(_ + _) ~ '\n' + val initCells = layout.map { _ => content } + + // one line of a cell, given a fixed width. + def cell: Int => Parser[T] => Parser[Parser[T]] = + width => p => (delegateN(width, p) <~ '|') ^^ { p => p << '\n' } + + // repeatAll is like repeat, but with a list of parsers as the state. + val row = repeatAll[T] { ps => + '|' ~> distr(zipWith(layout map cell, ps)) <~ '\n' + } + + some(row(initCells) <~ rowSeparator) } lazy val xs = many(some('x') ~ '\n') - table(xs) shouldParse """+---+ - ^|xxx| - ^+---+ - ^""".stripMargin('^') - - table(xs) shouldParse """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - - table(xs) shouldNotParse """+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---x--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - + table(xs) `shouldParse` + """+---+ + ^|xxx| + ^+---+ + ^""".stripMargin('^') + + table(xs) `shouldParse` + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') + + table(xs) `shouldNotParse` + """+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---x--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') lazy val nestedTables: NT[Any] = table(xs | nestedTables) - nestedTables shouldParse """+---+--------+------------+ - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^|xxx||x|xxxx||xxxxxxxxxxxx| - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - - nestedTables shouldNotParse """+---+--------+------------+ - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^|xxx||x|oxxx||xxxxxxxxxxxx| - ^|xxx|+-+----+|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^|xxx|xxxxxxxx|xxxxxxxxxxxx| - ^+---+--------+------------+ - ^""".stripMargin('^') - + nestedTables `shouldParse` + """+---+--------+------------+ + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^|xxx||x|xxxx||xxxxxxxxxxxx| + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') + + nestedTables `shouldNotParse` + """+---+--------+------------+ + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^|xxx||x|oxxx||xxxxxxxxxxxx| + ^|xxx|+-+----+|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^|xxx|xxxxxxxx|xxxxxxxxxxxx| + ^+---+--------+------------+ + ^""".stripMargin('^') // helper that should be in the stdlib - def zipWith[A,B](l1: List[A => B], l2: List[A]): List[B] = + def zipWith[A, B](l1: List[A => B], l2: List[A]): List[B] = (l1 zip l2).map { case (f, x) => f(x) } } describe("flatMap uses fixed point computation") { - lazy val fm: NT[Int] = succeed(1) | fm.flatMap { n => if (n < 5) succeed(n + 1) else err } + lazy val fm: NT[Int] = succ(1) | fm.flatMap { n => + if (n < 5) succ(n + 1) else err + } - fm.results.toSet shouldBe Set(1,2,3,4,5) + fm.results.toSet `shouldBe` Set(1, 2, 3, 4, 5) } - describe("Stream preprocessing") { - lazy val ones: NT[Any] = succeed(()) | '1' ~ ones - lazy val zeros: NT[Any] = succeed(()) | '0' ~ zeros + lazy val ones: NT[Any] = succ(()) | '1' ~ ones + lazy val zeros: NT[Any] = succ(()) | '0' ~ zeros lazy val oneszeros: Parser[Any] = '1' ~ '1' ~ '0' ~ '0' def bin(p: Parser[Any]): NT[Any] = done(p) | (('a' ~> bin(p << '1')) | ('b' ~> bin(p << '0'))) - ones shouldParse "1111" + ones `shouldParse` "1111" bin(ones).accepts - bin(ones) shouldParse "aaaaa" - bin(ones) shouldNotParse "aaaaab" - bin(zeros) shouldParse "bbbbb" - bin(zeros) shouldNotParse "bbbbba" - bin(oneszeros) shouldParse "aabb" - bin(oneszeros) shouldNotParse "aabbb" - - bin(ones) shouldNotParse ("b" * 50) + bin(ones) `shouldParse` "aaaaa" + bin(ones) `shouldNotParse` "aaaaab" + bin(zeros) `shouldParse` "bbbbb" + bin(zeros) `shouldNotParse` "bbbbba" + bin(oneszeros) `shouldParse` "aabb" + bin(oneszeros) `shouldNotParse` "aabbb" + + bin(ones) `shouldNotParse` ("b" `repeat` 50) } - describe("Results of ambiguous parses") { lazy val A: NT[Any] = (A <~ '+') ~ A | digit - def shouldParseWith(str: String)(expected: Set[Any]) { - (A <<< str).results.toSet should be (expected) + def shouldParseWith(str: String)(expected: Set[Any]) = { + (A <<< str).results.toSet should be(expected) } shouldParseWith("3") { Set('3') } @@ -216,7 +219,6 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers shouldParseWith("3+2+1") { Set(('3', ('2', '1')), (('3', '2'), '1')) } } - // Usecase // ------- // Standard example from data dependent parsing papers (like "One parser to rule them all", @@ -227,7 +229,9 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // input stream. Benefit of our approach: Body parser never sees more than N characters. describe("IMAP") { - val number = consumed(charRange('1', '9') ~ many(digit) | '0').map { _.mkString.toInt } + val number = consumed(charRange('1', '9') ~ many(digit) | '0').map { + _.mkString.toInt + } val header: Parser[Int] = ('{' ~ space) ~> number <~ (space ~ '}') @@ -242,31 +246,29 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers def IMAP[T](body: Parser[T]): Parser[T] = header >> feedNTimes(body) - IMAP(many('a')) shouldParse "{ 1 }a" - IMAP(many('a')) shouldNotParse "{ 1 }" - IMAP(many('a')) shouldNotParse "{ 1 }aa" - IMAP(many('a')) shouldParse "{ 7 }aaaaaaa" - IMAP(many('a')) shouldNotParse "{ 7 }aaaaaaaa" - IMAP(many('a')) shouldNotParse "{ 7 }" + IMAP(many('a')) `shouldParse` "{ 1 }a" + IMAP(many('a')) `shouldNotParse` "{ 1 }" + IMAP(many('a')) `shouldNotParse` "{ 1 }aa" + IMAP(many('a')) `shouldParse` "{ 7 }aaaaaaa" + IMAP(many('a')) `shouldNotParse` "{ 7 }aaaaaaaa" + IMAP(many('a')) `shouldNotParse` "{ 7 }" } - - // Usecase. interleaving parsers def interleave[T, S](p: Parser[T], q: Parser[S]): Parser[(T, S)] = - (done(p) & done(q)) | eat { c => - interleave(q, (p << c)) map { case (s, t) => (t, s) } - } + (done(p) & done(q)) | eat { c => + interleave(q, (p << c)) map { case (s, t) => (t, s) } + } describe("interleaving two parsers") { val p = 'a' ~ 'a' ~ 'a' val q = 'b' ~ 'b' ~ 'b' - interleave(p, q) shouldParse "ababab" - interleave(p, q) shouldNotParse "abababab" - interleave(p, q) shouldNotParse "abab" - interleave(p, q) shouldNotParse "ab" - interleave(p, q) shouldNotParse "" + interleave(p, q) `shouldParse` "ababab" + interleave(p, q) `shouldNotParse` "abababab" + interleave(p, q) `shouldNotParse` "abab" + interleave(p, q) `shouldNotParse` "ab" + interleave(p, q) `shouldNotParse` "" } // Usecase. Indentation that also skips empty lines @@ -277,199 +279,203 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers else { readLine(p << c) } } - done(p) | // do not indent and p can accept - (space ~ space) ~> readLine(p) | // indent by 2 and read one line, then recurse - (many(space) ~ newline) >> { _ => indent(p) } // skip lines with whitespace only, then recurse + done(p) | // do not indent and p can accept + (space ~ space) ~> readLine( + p + ) | // indent by 2 and read one line, then recurse + (many(space) ~ newline) >> { _ => + indent(p) + } // skip lines with whitespace only, then recurse } - describe("indenting parsers") { val xs = many(some('x') ~ '\n') - indent(xs) shouldParse "" - indent(xs) shouldParse " xx\n" - indent(xs) shouldParse " xxxxx\n" - indent(xs) shouldParse " xxxxx\n xxxxxxx\n" - indent(xs) shouldParse """ xxxxx - | xxxxxxx - | xxxxxxxx - | xxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxx - | xxxxxxxxxx - | xxxxxxxxxxx - | xxxxxxxxxxxx - | xxxxxxxxxxxxx - | xxxxxxxxxxxxxx - | xxxxxxxxxxxxxxx - | xxxxxxxxxxxxxxxx - | xxxxxxxxxxxxxxx - | xxxxxxxxxxxxxx - |""".stripMargin('|') - - indent(indent(xs)) shouldParse " xx\n" - indent(indent(xs)) shouldParse " xxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n xxxxxxx\n" - - indent(indent(xs)) shouldParse " xxxxx\n\n xxxxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n \n xxxxxxx\n" - indent(indent(xs)) shouldParse " xxxxx\n \n\n \n xxxxxxx\n" - indent(indent(xs)) shouldNotParse " xxxxx\n \n\n \n xxxxxxx\n" - indent(indent(xs)) shouldNotParse " xxxxx\n \n\n \n xxxxxxx\n" + indent(xs) `shouldParse` "" + indent(xs) `shouldParse` " xx\n" + indent(xs) `shouldParse` " xxxxx\n" + indent(xs) `shouldParse` " xxxxx\n xxxxxxx\n" + indent(xs) `shouldParse` + """ xxxxx + | xxxxxxx + | xxxxxxxx + | xxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxx + | xxxxxxxxxx + | xxxxxxxxxxx + | xxxxxxxxxxxx + | xxxxxxxxxxxxx + | xxxxxxxxxxxxxx + | xxxxxxxxxxxxxxx + | xxxxxxxxxxxxxxxx + | xxxxxxxxxxxxxxx + | xxxxxxxxxxxxxx + |""".stripMargin('|') + + indent(indent(xs)) `shouldParse` " xx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n xxxxxxx\n" + + indent(indent(xs)) `shouldParse` " xxxxx\n\n xxxxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n \n xxxxxxx\n" + indent(indent(xs)) `shouldParse` " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) `shouldNotParse` " xxxxx\n \n\n \n xxxxxxx\n" + indent(indent(xs)) `shouldNotParse` " xxxxx\n \n\n \n xxxxxxx\n" } describe("Parens parser") { import section_4_2.parens - parens shouldParse "" - parens shouldParse "()" - parens shouldParse "(())" - parens shouldNotParse "(()" + parens `shouldParse` "" + parens `shouldParse` "()" + parens `shouldParse` "(())" + parens `shouldNotParse` "(()" } describe("Retroactively, allow spaces in arbitrary positions") { - import section_4_2.{ spaced, parens } + import section_4_2.{spaced, parens} val sp = spaced(parens) - sp shouldParse "((()))" - sp shouldParse "((( )))" - sp shouldParse "( (( )))" - sp shouldParse "( (( ))) " - sp shouldParse "( (\n (\n )) ) " - sp shouldNotParse "( ( ( )) " + sp `shouldParse` "((()))" + sp `shouldParse` "((( )))" + sp `shouldParse` "( (( )))" + sp `shouldParse` "( (( ))) " + sp `shouldParse` "( (\n (\n )) ) " + sp `shouldNotParse` "( ( ( )) " } describe("Allowing parens in code blocks") { import section_4_2._ - as shouldParse "aaa\n" - as shouldParse "\n" - as shouldParse "aa\naa\n" - - both shouldParse "a\n" - both shouldParse """aaa - |~~~ - |() - |~~~ - |aaaaa - |""".stripMargin('|') - - both shouldParse "a \n\n~~~ \n()\n~~~\naaa\n" - - both shouldNotParse """aaa - |~~~ - |( - |~~~ - |aaaaa - |""".stripMargin('|') - - both shouldParse """aaa - |~~~ - |((()) - |~~~ - |aaaaa - | - |~~~ - |) - |~~~ - |""".stripMargin('|') + as `shouldParse` "aaa\n" + as `shouldParse` "\n" + as `shouldParse` "aa\naa\n" + + both `shouldParse` "a\n" + both `shouldParse` + """aaa + |~~~ + |() + |~~~ + |aaaaa + |""".stripMargin('|') + + both `shouldParse` "a \n\n~~~ \n()\n~~~\naaa\n" + + both `shouldNotParse` + """aaa + |~~~ + |( + |~~~ + |aaaaa + |""".stripMargin('|') + + both `shouldParse` + """aaa + |~~~ + |((()) + |~~~ + |aaaaa + | + |~~~ + |) + |~~~ + |""".stripMargin('|') } - - describe("Unescape") { import section_4_2._ - unescape(many('\n')) shouldParse """\n\n\n""" - unescape(many("\n" | "a")) shouldParse """\na\n\n""" - unescape(many("\n" | "a")) shouldParse """\na\n\naaa""" + unescape(many('\n')) `shouldParse` """\n\n\n""" + unescape(many("\n" | "a")) `shouldParse` """\na\n\n""" + unescape(many("\n" | "a")) `shouldParse` """\na\n\naaa""" } describe("Combined examples") { import section_4_2._ - combined shouldParse """aaa - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aa | - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aaaa| - ^|~~~ | - ^|(())| - ^|~~~ | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aa | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') - - combined shouldParse """+----+ - ^|aa | - ^|~~~ | - ^|(())| - ^|~~~ | - ^|aaaa| - ^+----+ - ^""".stripMargin('^') + combined `shouldParse` + """aaa + ^""".stripMargin('^') + + combined `shouldParse` "+----+\n|aaaa|\n+----+\n" + combined `shouldParse` "+----+\n|aa |\n+----+\n" + + combined `shouldParse` + """+----+ + ^|aaaa| + ^|~~~ | + ^|(())| + ^|~~~ | + ^|aaaa| + ^+----+ + ^""".stripMargin('^') + + combined `shouldParse` "+----+\n|aa |\n|aaaa|\n+----+\n" + + combined `shouldParse` + """+----+ + ^|aa | + ^|~~~ | + ^|(())| + ^|~~~ | + ^|aaaa| + ^+----+ + ^""".stripMargin('^') } describe("Biased choice") { val p = biasedAlt("foo", some(letter)) ~ "bar" - p shouldParse "foobar" - p shouldNotParse "foozbar" - p shouldParse "barbar" + p `shouldParse` "foobar" + p `shouldNotParse` "foozbar" + p `shouldParse` "barbar" // this test shows, that we can only implement a locally biased choice val q = biasedAlt("foo", "f") ~ "oo" // should actually *not* parse "foo", but does: - q shouldParse "foo" + q `shouldParse` "foo" } describe("Greedy repitition") { - it ("should return only the result of the longest match") { - greedySome(some('a')) parse "" shouldBe List() - greedyMany(some('a')) parse "" shouldBe List(List()) - greedySome(some('a')) parse "a" shouldBe List(List(List('a'))) - greedySome(some('a')) parse "aaa" shouldBe List(List(List('a', 'a', 'a'))) + it("should return only the result of the longest match") { + parse(greedySome(some('a')), "") `shouldBe` List() + parse(greedyMany(some('a')), "") `shouldBe` List(List()) + parse(greedySome(some('a')), "a") `shouldBe` List(List(List('a'))) + parse(greedySome(some('a')), "aaa") `shouldBe` + List(List(List('a', 'a', 'a'))) } - it ("should also return longest match if other parser succeeded first") { + it("should also return longest match if other parser succeeded first") { lazy val p = some("ab") | some("a") | some("b") - greedySome(p) parse "ab" shouldBe List(List(List("ab"))) - greedySome(p) parse "abab" shouldBe List(List(List("ab", "ab"))) - greedySome(p) parse "abbab" shouldBe List(List(List("ab"), List("b"), List("ab"))) - greedySome(p) parse "abbaab" shouldBe List(List(List("ab"), List("b"), List("a", "a"), List("b"))) - greedySome(p) parse "aaaab" shouldBe List(List(List("a", "a", "a", "a"), List("b"))) + parse(greedySome(p), "ab") `shouldBe` List(List(List("ab"))) + parse(greedySome(p), "abab") `shouldBe` List(List(List("ab", "ab"))) + parse(greedySome(p), "abbab") `shouldBe` List( + List(List("ab"), List("b"), List("ab")) + ) + parse(greedySome(p), "abbaab") `shouldBe` List( + List(List("ab"), List("b"), List("a", "a"), List("b")) + ) + parse(greedySome(p), "aaaab") `shouldBe` List( + List(List("a", "a", "a", "a"), List("b")) + ) lazy val q = "ab" | "a" | "b" - greedySome(q) parse "ab" shouldBe List(List("ab")) - greedySome(q) parse "abab" shouldBe List(List("ab", "ab")) - greedySome(q) parse "abbab" shouldBe List(List("ab", "b", "ab")) - greedySome(q) parse "abbaab" shouldBe List(List("ab", "b", "a", "ab")) - greedySome(q) parse "aaaab" shouldBe List(List("a", "a", "a", "ab")) + parse(greedySome(q), "ab") `shouldBe` List(List("ab")) + parse(greedySome(q), "abab") `shouldBe` List(List("ab", "ab")) + parse(greedySome(q), "abbab") `shouldBe` List(List("ab", "b", "ab")) + parse(greedySome(q), "abbaab") `shouldBe` List(List("ab", "b", "a", "ab")) + parse(greedySome(q), "aaaab") `shouldBe` List(List("a", "a", "a", "ab")) } // This shows that our implementation is only locally greedy - println(greedySome("ab" | "a") ~ "b" parse "abab") + println(parse(greedySome("ab" | "a") ~ "b", "abab")) } describe("how to locally rewrite biased choice") { @@ -488,15 +494,15 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val r: Parser[Any] = ("oo" | "b") val ex: Parser[Any] = biasedAlt(p, q) ~ r - // ex shouldNotParse "foo" //-> fails + // ex `shouldNotParse` "foo" //-> fails // If the right-hand-side `r` is locally known the parser can be // rewritten to: val rewrite = p ~ r | (neg(p ~ always) &> (q ~ r)) - rewrite shouldNotParse "foo" - rewrite shouldParse "foooo" - rewrite shouldParse "fb" + rewrite `shouldNotParse` "foo" + rewrite `shouldParse` "foooo" + rewrite `shouldParse` "fb" } // Since "lexing" is performed after indentation checking, but indentation @@ -521,53 +527,77 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // regions inside skip will not be treated by f. // `region` and `skip` should not have an intersection. - def transform[T](region: Parser[Any], skip: Parser[Any], f: Parser[Parser[T]] => Parser[Parser[T]]): Parser[T] => Parser[T] = { + def transform[T]( + region: Parser[Any], + skip: Parser[Any], + f: Parser[Parser[T]] => Parser[Parser[T]] + ): Parser[T] => Parser[T] = { // to prevent accessive re-parsing we introduce some caching on this // parser combinator here. val cache = mutable.WeakHashMap.empty[Parser[T], Parser[T]] - def rec: Parser[T] => Parser[T] = p => cache.getOrElseUpdate(p, { - - lazy val dp = delegate(p) - nonterminal ( - done(p) | biasedAlt( - ( skip &> dp - | region &> f(dp) - ) >> rec, - (any &> dp) >> rec)) - }) + def rec(p: Parser[T]): Parser[T] = + cache.getOrElseUpdate( + p, { + lazy val dp = delegate(p) + nonterminal( + done(p) | biasedAlt( + (skip &> dp | region &> f(dp)) >> rec, + (any &> dp) >> rec + ) + ) + } + ) rec } // parsers as input transformers def filterNewlines[T] = filter[T](_ != '\n') - def mask[T] = mapInPartial[T] { case '\n' => '↩' } - def toSpace[T] = mapInPartial[T] { case '\n' => ' ' } - def unmask[T] = mapInPartial[T] { case '↩' => '\n' } + def mask[T] = mapInPartial[T] { case '\n' => '↩' } + def toSpace[T] = mapInPartial[T] { case '\n' => ' ' } + def unmask[T] = mapInPartial[T] { case '↩' => '\n' } // some lexers - val singleString: Parser[String] = consumed('"' ~ many(nonOf("\"\n")) ~ '"') - val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') - val multilineString: Parser[String] = consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") + val singleString: Parser[String] = + consumed('"' ~ many(nonOf("\"\n")) ~ '"') + val comment: Parser[String] = consumed('#' ~ many(nonOf("\n")) ~ '\n') + val multilineString: Parser[String] = + consumed("'''" ~ neg(always ~ prefix("'''")) ~ "'''") - singleString shouldParse "\"hello world\"" - singleString shouldNotParse "\"hello\nworld\"" - singleString shouldParse "\"hello'''world\"" - multilineString shouldParse "'''Hello \" \n\" world'''" + singleString `shouldParse` "\"hello world\"" + singleString `shouldNotParse` "\"hello\nworld\"" + singleString `shouldParse` "\"hello'''world\"" + multilineString `shouldParse` "'''Hello \" \n\" world'''" // for testing val collect = consumed(always) ^^ { x => x.mkString } // for now just filter newlines - val p = transform[String](multilineString, singleString | comment, filterNewlines)(collect) + val p = transform[String]( + multilineString, + singleString | comment, + filterNewlines + )(collect) it("should only filter newlines in multiline strings") { - (p parse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n") should be (List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n")) + parse( + p, + "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n" + ) `should` be( + List("hello '''foo\"bar''' test\n foo \" bar'''foo \"\n") + ) } // here we can already observe performance problems (about 400ms): - p shouldParse "hello '''foo\n\"bar''' test\n foo \" bar'''foo \"\n some content that is not a program, but could be one \n. # ''' some comment \nIt contains newlines \n, \"and some Strings\". Even Multiline strings with '''newlines\n'''." - + p `shouldParse` + """hello '''foo + |"bar''' test + | foo " bar'''foo " + | some content that is not a program, but could be one + |. # ''' some comment + |It contains newlines + |, "and some Strings". Even Multiline strings with '''newlines + |'''.""".stripMargin lazy val noText: Parser[Any] = comment | singleString | multilineString @@ -579,32 +609,35 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers val pairs = Map[Elem, Elem]('(' -> ')', '[' -> ']', '{' -> '}') val (opening, closing) = (pairs.keys.toList, pairs.values.toList) - - lazy val dyck: NT[Any] = one(opening) >> { paren => many(dyck) ~ pairs(paren) } - //'(' ~> many(dyck) <~ ')' + lazy val dyck: NT[Any] = onOf(opening) >> { paren => + many(dyck) ~ pairs(paren) + } + // '(' ~> many(dyck) <~ ')' // within comments and strings filter out everything val parens = // we need to intersect with the outermost parenthesis to prevent // parsing something like "aaa()aaa" - (one(opening) >> { paren => always ~ pairs(paren) }) &> - transform[Any](noText | nonOf(opening) & nonOf(closing) , err, skip)(dyck) - - parens shouldParse "()" - parens shouldParse "(())" - parens shouldParse "(()()())" - parens shouldParse "(()[]())" - parens shouldParse "(()[()[]]())" - parens shouldNotParse "(()[()[]())" - parens shouldNotParse "a (()) a" - parens shouldNotParse "(()" - parens shouldParse "( hello world ())" - parens shouldParse "( [# foo \"()) \n ()]{\" [ \" hello } world ())" - parens shouldNotParse "( [# foo \"()) \n ()]{\" [ \" hello world ())" - parens shouldNotParse "( [# foo \"()) \n ()]\" [ \" hello } world ())" - parens shouldNotParse "( [# foo \"()) \n )]{\" [ \" hello } world ())" - parens shouldParse "( hello \" ) \"world ())" - parens shouldNotParse "( hello \" ) \"" + (onOf(opening) >> { paren => always ~ pairs(paren) }) &> + transform[Any](noText | nonOf(opening) & nonOf(closing), err, skip)( + dyck + ) + + parens `shouldParse` "()" + parens `shouldParse` "(())" + parens `shouldParse` "(()()())" + parens `shouldParse` "(()[]())" + parens `shouldParse` "(()[()[]]())" + parens `shouldNotParse` "(()[()[]())" + parens `shouldNotParse` "a (()) a" + parens `shouldNotParse` "(()" + parens `shouldParse` "( hello world ())" + parens `shouldParse` "( [# foo \"()) \n ()]{\" [ \" hello } world ())" + parens `shouldNotParse` "( [# foo \"()) \n ()]{\" [ \" hello world ())" + parens `shouldNotParse` "( [# foo \"()) \n ()]\" [ \" hello } world ())" + parens `shouldNotParse` "( [# foo \"()) \n )]{\" [ \" hello } world ())" + parens `shouldParse` "( hello \" ) \"world ())" + parens `shouldNotParse` "( hello \" ) \"" lazy val escapedNL = '\\' ~ '\n' @@ -617,21 +650,35 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // reusing some definition of `indented` import section_3_5_improved._ - def joiningIndent[T]: Parser[T] => Parser[T] = p => - ilj(elj(mlj(indented(unmask(p))))) - + def joiningIndent[T]: Parser[T] => Parser[T] = + p => ilj(elj(mlj(indented(unmask(p))))) it("should mask perform line joining before checking indentation") { - (joiningIndent(collect) parse " foo'''a \n a'''\n bar\n ( \n )\n") should be ( + parse( + joiningIndent( + collect + ), + " foo'''a \n a'''\n bar\n ( \n )\n" + ) `should` be( List("foo'''a \n a'''\nbar\n( \n )\n") ) - (joiningIndent(collect) parse " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n") should be ( - List("'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n") + parse( + joiningIndent( + collect + ), + " '''some \n multiline \n'''\n ( # comment (\n ) hello\n test and \\\n escaped\n" + ) `should` be( + List( + "'''some \n multiline \n'''\n( # comment (\n ) hello\ntest and \\\n escaped\n" + ) ) } - joiningIndent(collect) shouldParse " '''some \n multiline \n'''\n ( # comment (\n )\n" - joiningIndent(collect) shouldNotParse " '''some \n multiline \n''\n ( # comment (\n )\n" - + joiningIndent( + collect + ) `shouldParse` " '''some \n multiline \n'''\n ( # comment (\n )\n" + joiningIndent( + collect + ) `shouldNotParse` " '''some \n multiline \n''\n ( # comment (\n )\n" val WS: Parser[Any] = ' ' val spacesNoNl = some(WS) @@ -643,38 +690,43 @@ class DerivativeParsersTests extends FunSpec with Matchers with CustomMatchers // Python Parser Skeleton - lazy val expr: NT[Any] = id | singleString | multilineString | "(" ~> spaces ~> expr <~ spaces <~ ")" | "[" ~> spaces ~> opt(someSep(expr, spaces ~ "," ~ spaces) ~ spaces) <~ "]" - lazy val stmt: NT[Any] = expr <~ lineEnd | "def" ~> spacesNoNl ~> id ~ ("():" ~> suite) + lazy val expr: NT[Any] = + id | singleString | multilineString | "(" ~> spaces ~> expr <~ spaces <~ ")" | "[" ~> spaces ~> opt( + someSep(expr, spaces ~ "," ~ spaces) ~ spaces + ) <~ "]" + lazy val stmt: NT[Any] = + expr <~ lineEnd | "def" ~> spacesNoNl ~> id ~ ("():" ~> suite) lazy val stmts: NT[Any] = someSep(stmt, spaces) lazy val suite: NT[Any] = lineEnd ~> joiningIndent(stmts) - stmt shouldParse "def foo():\n '''hello\n '''\n" - stmt shouldNotParse "def foo():\n \"'''hello\n '''\"\n" - stmt shouldParse "def foo():\n '''hello\n ''' # some comment \n" - stmt shouldNotParse "def foo():\n # '''hello\n ''' some comment \n" - stmt shouldParse "def foo():\n []\n" - stmt shouldParse "def foo():\n [foo, bar]\n" - stmt shouldParse "def foo():\n [foo, \nbar]\n" - stmt shouldNotParse "def foo():\n \"[foo, \nbar]\"\n" - stmt shouldParse "def foo():\n \"[foo, bar]\"\n" - stmt shouldParse "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" - stmt shouldParse "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" + stmt `shouldParse` "def foo():\n '''hello\n '''\n" + stmt `shouldNotParse` "def foo():\n \"'''hello\n '''\"\n" + stmt `shouldParse` "def foo():\n '''hello\n ''' # some comment \n" + stmt `shouldNotParse` "def foo():\n # '''hello\n ''' some comment \n" + stmt `shouldParse` "def foo():\n []\n" + stmt `shouldParse` "def foo():\n [foo, bar]\n" + stmt `shouldParse` "def foo():\n [foo, \nbar]\n" + stmt `shouldNotParse` "def foo():\n \"[foo, \nbar]\"\n" + stmt `shouldParse` "def foo():\n \"[foo, bar]\"\n" + stmt `shouldParse` "def foo():\n foo\n def bar():\n \"hello\"\n bar\n" + stmt `shouldParse` "def foo():\n foo\n def bar():\n '''\nhello\n'''\n bar\n" } - describe("Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`") { + describe( + "Regression: `not` should preserve invariant `p.results.isEmpty != p.accepts`" + ) { val p = neg("a" | "b") val p_a = p <<< "a" val p_b = p <<< "b" val p_c = p <<< "c" - it ("should preserve the invariant when performing optimization rewrites") { - p_a.accepts shouldBe false - p_a.accepts shouldBe (!p_a.results.isEmpty) - p_b.accepts shouldBe false - p_b.accepts shouldBe (!p_b.results.isEmpty) - p_c.accepts shouldBe true - p_c.accepts shouldBe (!p_c.results.isEmpty) + it("should preserve the invariant when performing optimization rewrites") { + p_a.accepts `shouldBe` false + p_a.accepts `shouldBe` (!p_a.results.isEmpty) + p_b.accepts `shouldBe` false + p_b.accepts `shouldBe` (!p_b.results.isEmpty) + p_c.accepts `shouldBe` true + p_c.accepts `shouldBe` (!p_c.results.isEmpty) } } - } diff --git a/artifact/src/test/scala/LeftrecTests.scala b/artifact/src/test/scala/LeftrecTests.scala index 6314a55..b0f3f8a 100644 --- a/artifact/src/test/scala/LeftrecTests.scala +++ b/artifact/src/test/scala/LeftrecTests.scala @@ -1,39 +1,42 @@ package fcd package test -import org.scalatest._ -trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec - import parsers._ +trait LeftrecTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => + + import parsers.{succeed as succ, *} describe("lazyness of alt") { describe("p = p | .") { lazy val p: NT[Any] = p | any - p shouldParse "a" + p `shouldParse` "a" } describe("p = p ~ . | .") { - lazy val p: NT[_] = p ~ any | any - p shouldParse "a" + lazy val p: NT[Any] = p ~ any | any + p `shouldParse` "a" } describe("p = . | p ~ .") { - lazy val p: NT[_] = any | p ~ any - p shouldParse "a" + lazy val p: NT[Any] = any | p ~ any + p `shouldParse` "a" } describe("p = (. | .) >> { (. | p) ^^ id }") { - lazy val p: NT[Any] = (p | any) flatMap { _ => (any | p) map identity } - p.shouldParse("aa") - p.shouldParse("aaaaa") + lazy val p: NT[Any] = (p | any) >> { _ => (any | p) ^^ identity } + p `shouldParse` "aa" + p `shouldParse` "aaaaa" } describe("p = (. | p) >> { a }") { - lazy val p: NT[Any] = (any | p) flatMap { _ => 'a' } - p.shouldParse("aa") - p.shouldParse("aaa") - p.shouldParse("aaaaaa") + lazy val p: NT[Any] = (any | p) >> { _ => 'a' } + p `shouldParse` "aa" + p `shouldParse` "aaa" + p `shouldParse` "aaaaaa" } } @@ -41,31 +44,31 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("p = . ~ p") { lazy val p: NT[Any] = any ~ p - p shouldNotParse "a" + p `shouldNotParse` "a" } describe("p = p ~ .") { lazy val p: NT[Any] = p ~ any - p shouldNotParse "a" + p `shouldNotParse` "a" } } describe("left recursion") { describe("A = A ~ a | empty") { - lazy val A: NT[_] = A ~ 'a' | succeed(42) + lazy val A: NT[Any] = A ~ 'a' | succ(42) - A shouldParse "" - A shouldParse "a" - A shouldParse "aa" + A `shouldParse` "" + A `shouldParse` "a" + A `shouldParse` "aa" } describe("A = empty | A ~ a ") { - lazy val A: NT[_] = succeed(42) | A ~ 'a' + lazy val A: NT[Any] = succ(42) | A ~ 'a' - A shouldParse "" - A shouldParse "a" - A shouldParse "aa" + A `shouldParse` "" + A `shouldParse` "a" + A `shouldParse` "aa" } // Simple example of indirect leftrecursion from @@ -73,93 +76,93 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => describe("one level indirect leftrecursion") { lazy val num: Parser[Any] = many(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ A + lazy val B: NT[Any] = succ(()) ~ A - // A shouldParse "1" - // A shouldParse "12" - // A shouldParse "12-32" - // A shouldParse "12-32-45" + // A `shouldParse` "1" + // A `shouldParse` "12" + // A `shouldParse` "12-32" + // A `shouldParse` "12-32-45" - B shouldParse "1" - B shouldParse "12" - B shouldParse "12-32" - B shouldParse "12-32-45" + B `shouldParse` "1" + B `shouldParse` "12" + B `shouldParse` "12-32" + B `shouldParse` "12-32-45" } describe("two levels indirect leftrecursion") { lazy val num: Parser[Any] = some(digit) lazy val A: NT[Any] = B ~ '-' ~ num | num - lazy val B: NT[Any] = succeed(()) ~ C ~ '+' ~ num - lazy val C: NT[Any] = succeed(()) ~ A - - A shouldParse "1" - A shouldParse "12" - C shouldParse "2" - C shouldParse "22" - B shouldParse "12+32" - A shouldParse "12+32-42" - A shouldParse "12+12-32+45-44" - A shouldNotParse "" - A shouldNotParse "12+13+14" - A shouldNotParse "12+13+14-14-56" + lazy val B: NT[Any] = succ(()) ~ C ~ '+' ~ num + lazy val C: NT[Any] = succ(()) ~ A + + A `shouldParse` "1" + A `shouldParse` "12" + C `shouldParse` "2" + C `shouldParse` "22" + B `shouldParse` "12+32" + A `shouldParse` "12+32-42" + A `shouldParse` "12+12-32+45-44" + A `shouldNotParse` "" + A `shouldNotParse` "12+13+14" + A `shouldNotParse` "12+13+14-14-56" } // From "Packrat parsers can support left-recursion" describe("super linear parse time") { - lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succeed(()) + lazy val start: NT[Any] = ones ~ '2' | '1' ~ start | succ(()) lazy val ones: NT[Any] = ones ~ '1' | '1' - start shouldParse "" - start shouldParse "1" - start shouldParse "12" - start shouldParse "11112" - start shouldParse "111111" - start shouldParse "1111112" + start `shouldParse` "" + start `shouldParse` "1" + start `shouldParse` "12" + start `shouldParse` "11112" + start `shouldParse` "111111" + start `shouldParse` "1111112" // Actually computing the result triggers a stackoverflow - // start shouldParse ("1" * 200) + // start `shouldParse` ("1" * 200) } describe("A = A ~ b | c") { - lazy val A: NT[_] = A ~ 'b' | 'c' + lazy val A: NT[Any] = A ~ 'b' | 'c' - A shouldParse "c" - A shouldParse "cb" - A shouldParse "cbb" - A shouldParse "cbbbbbbbbbbbbb" - A shouldNotParse "cbbbbbbbbbbbbbc" + A `shouldParse` "c" + A `shouldParse` "cb" + A `shouldParse` "cbb" + A `shouldParse` "cbbbbbbbbbbbbb" + A `shouldNotParse` "cbbbbbbbbbbbbbc" } describe("A = empty ~ A ~ b | empty") { - lazy val A: NT[Any] = succeed("done") ~ A ~ 'b' | succeed("done") - A shouldParse "" - A shouldParse "b" - A shouldParse "bb" + lazy val A: NT[Any] = succ("done") ~ A ~ 'b' | succ("done") + A `shouldParse` "" + A `shouldParse` "b" + A `shouldParse` "bb" } // should parse at most as many 'd's as it parses 'b's. describe("A = B ~ A ~ b | c\n B = d | empty") { lazy val A: NT[Char] = B ~> A <~ 'b' | 'c' - lazy val B: NT[_] = charParser('d') | succeed("done") - - A shouldParse "c" - A shouldParse "cb" - A shouldParse "dcb" - A shouldParse "cbb" - A shouldParse "ddcbb" - A shouldNotParse "dddcb" - A shouldParse "dddddcbbbbbbbbbbbbb" + lazy val B: NT[Any] = 'd' | succ("done") + + A `shouldParse` "c" + A `shouldParse` "cb" + A `shouldParse` "dcb" + A `shouldParse` "cbb" + A `shouldParse` "ddcbb" + A `shouldNotParse` "dddcb" + A `shouldParse` "dddddcbbbbbbbbbbbbb" } describe("many(some(a))") { lazy val p = many(some('a')) - p shouldParse "" - p shouldParse("a") - p shouldParse("aaa") - p shouldParse("aaaaaaaaaa") - p shouldNotParse("b") - p shouldNotParse("aaab") + p `shouldParse` "" + p `shouldParse` "a" + p `shouldParse` "aaa" + p `shouldParse` "aaaaaaaaaa" + p `shouldNotParse` "b" + p `shouldNotParse` "aaab" } describe("del(ones)") { @@ -176,144 +179,131 @@ trait LeftrecTests extends CustomMatchers { self: FunSpec with Matchers => lazy val rr: NT[String] = "1" ~> rr | "1" lazy val ll: NT[String] = ll <~ "1" | "1" - ll shouldParse ("1" * 40) - rr shouldParse ("1" * 41) + ll `shouldParse` ("1" `repeat` 40) + rr `shouldParse` ("1" `repeat` 41) } // Grammar from Tillmann Rendel's GLL library describe("very ambiguous") { lazy val A: NT[Char] = A ~> A | A ~> A ~> A | 'a' - A shouldNotParse "" - A shouldParse "a" - A shouldParse "aa" - A shouldParse "aaa" - A shouldParse ("a" * 100) + A `shouldNotParse` "" + A `shouldParse` "a" + A `shouldParse` "aa" + A `shouldParse` "aaa" + A `shouldParse` ("a" `repeat` 100) lazy val A2: Parser[Any] = some(some('a')) - A2 shouldParse ("a" * 1000) + A2 `shouldParse` ("a" `repeat` 1000) } describe("mixed mutual recursion") { lazy val expression: NT[Any] = - ( literal ~ '+' - | condExpr - ) - - lazy val condExpr: NT[Any] = - ( condExpr ~ '?' - | eqExpr - ) - - lazy val eqExpr: NT[Any] = - ( eqExpr ~ '*' - | literal - ) - - lazy val literal: NT[Any] = - ( many('a') - | '[' ~ arrayEl - ) - - lazy val arrayEl: NT[Any] = - ( expression - | succeed ("undefined") - ) - - expression shouldParse "" - expression shouldParse "a" - expression shouldParse "aaaaa" - expression shouldParse "[" - expression shouldParse "[a" - expression shouldParse "[aaaaa" - expression shouldParse "[[[[a" + (literal ~ '+' + | condExpr) + + lazy val condExpr: NT[Any] = condExpr ~ '?' | eqExpr + + lazy val eqExpr: NT[Any] = eqExpr ~ '*' | literal + + lazy val literal: NT[Any] = many('a') | '[' ~ arrayEl + + lazy val arrayEl: NT[Any] = expression | succ("undefined") + + expression `shouldParse` "" + expression `shouldParse` "a" + expression `shouldParse` "aaaaa" + expression `shouldParse` "[" + expression `shouldParse` "[a" + expression `shouldParse` "[aaaaa" + expression `shouldParse` "[[[[a" } describe("terms") { + enum Term { + case BinOp(lhs: Term, op: String, rhs: Term) + case Num(n: Int) + } - trait Term - case class BinOp(lhs: Term, op: String, rhs: Term) extends Term - case class Num(n: Int) extends Term + import Term._ lazy val term: NT[Term] = - ( term ~ "+" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | term ~ "-" ~ fact ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact - ) + (term ~ "+" ~ fact ^^ { case ((l, op), r) => BinOp(l, op, r) } + | term ~ "-" ~ fact ^^ { case ((l, op), r) => BinOp(l, op, r) } + | fact) lazy val fact: NT[Term] = - ( fact ~ "*" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | fact ~ "/" ~ num ^^ { case l ~ op ~ r => BinOp(l, op, r) } - | num - ) + (fact ~ "*" ~ num ^^ { case ((l, op), r) => BinOp(l, op, r) } + | fact ~ "/" ~ num ^^ { case ((l, op), r) => BinOp(l, op, r) } + | num) lazy val num: Parser[Num] = some(digit) ^^ (ns => Num(ns.mkString.toInt)) - num shouldParse "12345" - term shouldParse "12+31" - term shouldParse "12*8+31*45" + num `shouldParse` "12345" + term `shouldParse` "12+31" + term `shouldParse` "12*8+31*45" } // Grammar and testcases from Tillmann Rendel's GLL library. describe("balanced smileys") { lazy val az: NT[Any] = acceptIf(c => c >= 'a' && c <= 'z') lazy val S: NT[Any] = many(az | ' ' | ':' | ':' ~ P | '(' ~ S ~ ')') - lazy val P: NT[Any] = charParser('(') | ')' - - S shouldParse "" - S shouldNotParse ":((" - S shouldParse "i am sick today (:()" - S shouldParse "(:)" - S shouldParse "hacker cup: started :):)" - S shouldNotParse ")(" - S shouldNotParse "(((a)):()a(()(((:))a((:)():(((()()a)))(:a(::)(a)))(a)((a::():(a)():)a(a(a(:aa(:()(a(((((()))))))))" - S shouldParse "():)((()():(:())))::aa((((:(((:)))::a:(:))()a)):(a):::((()a((a(aa(():))(():())((::a)a)):)()" - S shouldParse ":(a):(:)aa)a(:()::():))a:aaa:)(:)((()()))a()(((()(:)))(:(aa:()())())a((a)a:(:()))(a((():)))" - S shouldParse ":a:)(:))()(()()a)aaa::a()()a:()()a::)((()(a(a))))try implementing sleep sort if you are stuck:(:)a)" - S shouldNotParse "(a())(::)(a))():(((a(()(:))a(:)))(:(:(:((():)(a))(:))(a)():(:(()aa):)(a((())a)a((a):)()(:(" - S shouldParse "(::a((a)a:()):):a)aa:)a(:::))(a())aa(a():))(:)a)((():)(:a:)a))):a(a)((:()(()())a))()a((()a))" - S shouldParse "()(((a)((aa)))a)a()(a)(aa:a)()(((:())aa)):()():():a:(a)(a())a:)::a:(aa:):()((a:)())aa)a(a:)" - S shouldParse ":)()((a)):(():a:a:)(:a)):)(()(:)::::(a(::a())(a):(:((((:(aa(()))a)(((((((((()a()a):)))((:)))))))))" - S shouldParse "a(a)::(((::)))())((a)(:((:a())):((::(:()(a)))i am trapped in a test case generator :(:(a(:::))" - S shouldParse "((:):::(()()):)(()()():())aaa)(:(a:)a:((())a(((a(:())aa():a:)((()):)(()(:)(a())a:()a)a():(" - S shouldNotParse "(:a))" - S shouldParse "::((:))(((:)(aaa)(a())()(a:)(:)(:)()):)a())aa)())(():a):()::):)a()())a()):):(:a)a):()(a)(a)" - S shouldParse "()a(:)(a:a):(())):a()():((a(:):a()()::)(a:)(()a((a:)(a)a(a:a:)(a)a(a:(()()()::a()a()(()a:())))" - S shouldParse "()((:a(a()()a))())((:a(:a)(()a((((a((a(()(:aa()()()))):)(():):)(:(a))():(())(():()):):(()a))" - S shouldParse "(((((((((())))))))))" - S shouldParse "(((((((((((((((((((())))))))))))))))))))" - S shouldParse "((((((((((:))))))))))" - S shouldParse "((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldNotParse "(((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" - S shouldParse "((((((((((((:))))))))))((((((((((:())))))))))))" - S shouldNotParse "(((((((((()))))))))))" - S shouldNotParse "(((((((((((((((((((()))))))))))))))))))))" - S shouldParse "((((((((((:)))))))))))" - S shouldParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" - S shouldParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" - S shouldParse "((((((((((((:))))))))))((((((((((:)))))))))))))" - S shouldNotParse "((((((((((:))))))))))))" - S shouldNotParse "((((((((((((:))))))))))((((((((((:)))))))))))))))" - S shouldNotParse "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))))))))" - S shouldNotParse "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))))))" + lazy val P: NT[Any] = alt('(', ')') + + S `shouldParse` "" + S `shouldNotParse` ":((" + S `shouldParse` "i am sick today (:()" + S `shouldParse` "(:)" + S `shouldParse` "hacker cup: started :):)" + S `shouldNotParse` ")(" + S `shouldNotParse` "(((a)):()a(()(((:))a((:)():(((()()a)))(:a(::)(a)))(a)((a::():(a)():)a(a(a(:aa(:()(a(((((()))))))))" + S `shouldParse` "():)((()():(:())))::aa((((:(((:)))::a:(:))()a)):(a):::((()a((a(aa(():))(():())((::a)a)):)()" + S `shouldParse` ":(a):(:)aa)a(:()::():))a:aaa:)(:)((()()))a()(((()(:)))(:(aa:()())())a((a)a:(:()))(a((():)))" + S `shouldParse` ":a:)(:))()(()()a)aaa::a()()a:()()a::)((()(a(a))))try implementing sleep sort if you are stuck:(:)a)" + S `shouldNotParse` "(a())(::)(a))():(((a(()(:))a(:)))(:(:(:((():)(a))(:))(a)():(:(()aa):)(a((())a)a((a):)()(:(" + S `shouldParse` "(::a((a)a:()):):a)aa:)a(:::))(a())aa(a():))(:)a)((():)(:a:)a))):a(a)((:()(()())a))()a((()a))" + S `shouldParse` "()(((a)((aa)))a)a()(a)(aa:a)()(((:())aa)):()():():a:(a)(a())a:)::a:(aa:):()((a:)())aa)a(a:)" + S `shouldParse` ":)()((a)):(():a:a:)(:a)):)(()(:)::::(a(::a())(a):(:((((:(aa(()))a)(((((((((()a()a):)))((:)))))))))" + S `shouldParse` "a(a)::(((::)))())((a)(:((:a())):((::(:()(a)))i am trapped in a test case generator :(:(a(:::))" + S `shouldParse` "((:):::(()()):)(()()():())aaa)(:(a:)a:((())a(((a(:())aa():a:)((()):)(()(:)(a())a:()a)a():(" + S `shouldNotParse` "(:a))" + S `shouldParse` "::((:))(((:)(aaa)(a())()(a:)(:)(:)()):)a())aa)())(():a):()::):)a()())a()):):(:a)a):()(a)(a)" + S `shouldParse` "()a(:)(a:a):(())):a()():((a(:):a()()::)(a:)(()a((a:)(a)a(a:a:)(a)a(a:(()()()::a()a()(()a:())))" + S `shouldParse` "()((:a(a()()a))())((:a(:a)(()a((((a((a(()(:aa()()()))):)(():):)(:(a))():(())(():()):):(()a))" + S `shouldParse` "(((((((((())))))))))" + S `shouldParse` "(((((((((((((((((((())))))))))))))))))))" + S `shouldParse` "((((((((((:))))))))))" + S `shouldParse` "((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldNotParse` "(((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:))))))))))" + S `shouldParse` "((((((((((((:))))))))))((((((((((:())))))))))))" + S `shouldNotParse` "(((((((((()))))))))))" + S `shouldNotParse` "(((((((((((((((((((()))))))))))))))))))))" + S `shouldParse` "((((((((((:)))))))))))" + S `shouldParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:)))))))))))))))))))))))))))))))))))))))))))))))))))" + S `shouldParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))" + S `shouldParse` "((((((((((((:))))))))))((((((((((:)))))))))))))" + S `shouldNotParse` "((((((((((:))))))))))))" + S `shouldNotParse` "((((((((((((:))))))))))((((((((((:)))))))))))))))" + S `shouldNotParse` "((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))) ((((((((((:)))))))))))))))))" + S `shouldNotParse` "(a((f((g(((g((:))))g))))))::((((((((((((((((((((:)))))))))))))))))))) ((((((((((((((((((((((((((((((((((((((((((((((((((:))))))))))))))))))))))))))))))))))))))))))))))))))))))" } // This is grammar Γ₁ from Scott and Johnstone (2010, Sec. 5). // taken from Tillmann Rendel's GLL library describe("grammar with hidden left recursion") { lazy val S: NT[Any] = C ~ 'a' | 'd' - lazy val B: NT[Any] = succeed(()) | 'a' - lazy val C: NT[Any] = charParser('b') | B ~ C ~ 'b' | 'b' ~ 'b' - - S shouldNotParse "" - S shouldNotParse "aba" - S shouldParse "d" - S shouldParse "ba" - S shouldParse "bba" - S shouldParse "abba" - S shouldParse "aabbba" + lazy val B: NT[Any] = succ(()) | 'a' + lazy val C: NT[Any] = 'b' | B ~ C ~ 'b' | 'b' ~ 'b' + + S `shouldNotParse` "" + S `shouldNotParse` "aba" + S `shouldParse` "d" + S `shouldParse` "ba" + S `shouldParse` "bba" + S `shouldParse` "abba" + S `shouldParse` "aabbba" } } } diff --git a/artifact/src/test/scala/NegationTests.scala b/artifact/src/test/scala/NegationTests.scala index 0cf39e0..ba9e496 100644 --- a/artifact/src/test/scala/NegationTests.scala +++ b/artifact/src/test/scala/NegationTests.scala @@ -1,59 +1,60 @@ package fcd package test -import org.scalatest._ +import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -trait NegationTests extends CustomMatchers { self: FunSpec with Matchers => +trait NegationTests { + self: AnyFunSpec & CustomMatchers[RichParsers] => - import parsers._ - import parsers.{ not => neg } + import parsers.{not as neg, *} describe("parser \"not(aa)\"") { val p = neg("aa") - p shouldParse "a" - p shouldNotParse "aa" - p shouldParse "aac" - p shouldParse "abc" + p `shouldParse` "a" + p `shouldNotParse` "aa" + p `shouldParse` "aac" + p `shouldParse` "abc" } describe("parser \"not(aa) & lower*\"") { val p = neg("aa") & many(lower) - p shouldParse "a" - p shouldParse "bc" - p shouldParse "ab" - p shouldNotParse "aa" - p shouldParse "abc" - p shouldParse "aac" - p shouldParse "aacdd" + p `shouldParse` "a" + p `shouldParse` "bc" + p `shouldParse` "ab" + p `shouldNotParse` "aa" + p `shouldParse` "abc" + p `shouldParse` "aac" + p `shouldParse` "aacdd" } describe("parser \"not(aa ~ .*) & lower*\"") { val p = neg("aa" ~ many(any)) & many(lower) - p shouldParse "a" - p shouldParse "bc" - p shouldParse "ab" - p shouldNotParse "aa" - p shouldParse "abc" - p shouldNotParse "aac" - p shouldNotParse "aacadasdasdasd" + p `shouldParse` "a" + p `shouldParse` "bc" + p `shouldParse` "ab" + p `shouldNotParse` "aa" + p `shouldParse` "abc" + p `shouldNotParse` "aac" + p `shouldNotParse` "aacadasdasdasd" } describe("parser \"not(.* ~ abc ~ .*)\"") { val p = neg(many(any) ~ "abc" ~ many(any)) - p shouldParse "" - p shouldParse "xx" - p shouldParse "xxabxx" - p shouldNotParse "xxabcxxx" - p shouldNotParse "xxabc" - p shouldNotParse "abcxxx" + p `shouldParse` "" + p `shouldParse` "xx" + p `shouldParse` "xxabxx" + p `shouldNotParse` "xxabcxxx" + p `shouldNotParse` "xxabc" + p `shouldNotParse` "abcxxx" } describe("parser \"not((baaa | ba) ~ aa ~ .*) & lower*\"") { - val p: Parser[_] = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) - p shouldNotParse "baaa" - p shouldNotParse "baaaxx" - p shouldParse "" - p shouldParse "baba" - p shouldParse "baacxx" + val p = neg(("baaa" | "ba") ~ "aa" ~ many(any)) & many(lower) + p `shouldNotParse` "baaa" + p `shouldNotParse` "baaaxx" + p `shouldParse` "" + p `shouldParse` "baba" + p `shouldParse` "baacxx" } } diff --git a/artifact/src/test/scala/PythonParserTests.scala b/artifact/src/test/scala/PythonParserTests.scala index fe7cdb6..77b0070 100644 --- a/artifact/src/test/scala/PythonParserTests.scala +++ b/artifact/src/test/scala/PythonParserTests.scala @@ -1,286 +1,234 @@ package fcd package test -import org.scalatest._ import scala.language.implicitConversions +import org.scalatest.funspec.AnyFunSpec -class PythonParserTests extends FunSpec with Matchers { +class PythonParserTests + extends AnyFunSpec + with CustomMatchers[PythonParsers.type](PythonParsers) { - val parsers = PythonParsers import parsers._ + import parsers.given + import Lexeme._ - describe ("indented python parser (lexeme based)") { - indented(many(many(Id("A")) <~ NL)) shouldParseWith ( - List(WS, WS, Id("A"), Id("A"), NL, - WS, WS, Id("A"), NL), - List(List(Id("A"), Id("A")), List(Id("A")))) + describe("indented python parser (lexeme based)") { + indented(many(many(Id("A")) <~ NL)) `shouldParseWith` ( + List(WS, WS, Id("A"), Id("A"), NL, WS, WS, Id("A"), NL), + List(List(Id("A"), Id("A")), List(Id("A"))) + ) } - describe ("implicit line joining") { - - implicit def keyword(kw: Symbol): Lexeme = KW(kw.name) - implicit def punctuation(p: String): Lexeme = Punct(p) - - val p = many(WS | id | "(" | ")" | "[" | "]") - val a = Id("A") - val BS = Punct("\\") - - - dyck shouldParse List[Lexeme]("(", "(", ")", ")") - dyck shouldNotParse List[Lexeme]("(", "(", ")") - extDyck shouldParse List[Lexeme]("(", a, "(", a, NL, a, ")", a, ")") - extDyck shouldNotParse List[Lexeme](a, "(", a, "(", a, NL, a, ")", a, ")", a) - - implicitJoin(p) shouldParse List[Lexeme](a, a, a, a, a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, a, NL, a, a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, NL, a, ")", a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, NL, a, a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, "(", NL, a, ")", a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, "(", NL, a, ")", ")", a) - implicitJoin(p) shouldParse List[Lexeme](a, a, "(", a, "[", NL, a, "]", ")", a) - implicitJoin(p) shouldNotParse List[Lexeme](a, a, "(", a, "[", NL, a, ")", "]", a) - - explicitJoin(p) shouldParse List[Lexeme](a, a, a, BS, NL, a, a) - explicitJoin(p) shouldParse List[Lexeme](a, a, a, BS, NL, a, a, BS, NL, a, a) - - val input = List[Lexeme]( - a, NL, - Comment("Hey!!"), a, BS, NL, - a, a, "(", a, "[", a, BS, NL, - a, NL, - a, "]", ")", a) - - val inputWithoutComments = List[Lexeme]( - a, NL, - a, BS, NL, - a, a, "(", a, "[", a, BS, NL, - a, NL, - a, "]", ")", a) - - val inputWithoutExplicit = List[Lexeme]( - a, NL, - a, - a, a, "(", a, "[", a, - a, NL, - a, "]", ")", a) - - val inputResult = List[Lexeme]( - a, NL, - a, - a, a, "(", a, "[", a, - a, - a, "]", ")", a) + describe("implicit line joining") { + + given keyword: Conversion[Symbol, Lexeme] = kw => KW(kw.name) + given punctuation: Conversion[String, Lexeme] = Punct(_) + + val p = many(WS | id | "(" | ")" | "[" | "]") + val a = Id("A") + val BS = Punct("\\") + + dyck `shouldParse` List[Lexeme]("(", "(", ")", ")") + dyck `shouldNotParse` List[Lexeme]("(", "(", ")") + extDyck `shouldParse` List("(", a, "(", a, NL, a, ")", a, ")") + extDyck `shouldNotParse` List(a, "(", a, "(", a, NL, a, ")", a, ")", a) + + implicitJoin(p) `shouldParse` List(a, a, a, a, a) + implicitJoin(p) `shouldNotParse` List(a, a, a, NL, a, a) + implicitJoin(p) `shouldParse` List(a, a, "(", a, NL, a, ")", a) + implicitJoin(p) `shouldNotParse` List(a, a, "(", a, NL, a, a) + implicitJoin(p) `shouldNotParse` List(a, a, "(", a, "(", NL, a, ")", a) + implicitJoin(p) `shouldParse` List(a, a, "(", a, "(", NL, a, ")", ")", a) + implicitJoin(p) `shouldParse` List(a, a, "(", a, "[", NL, a, "]", ")", a) + implicitJoin(p) `shouldNotParse` List(a, a, "(", a, "[", NL, a, ")", "]", a) + + explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a) + explicitJoin(p) `shouldParse` List(a, a, a, BS, NL, a, a, BS, NL, a, a) + + val input = List[Lexeme](a, NL, Comment("Hey!!"), a, BS, NL, a, a, "(", a, + "[", a, BS, NL, a, NL, a, "]", ")", a) + + val inputWithoutComments = List[Lexeme](a, NL, a, BS, NL, a, a, "(", a, "[", + a, BS, NL, a, NL, a, "]", ")", a) + + val inputWithoutExplicit = + List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, NL, a, "]", ")", a) + + val inputResult = + List[Lexeme](a, NL, a, a, a, "(", a, "[", a, a, a, "]", ")", a) val collect = consumed(many(any)) - stripComments(collect) shouldParseWith (input, inputWithoutComments) - explicitJoin(collect) shouldParseWith (inputWithoutComments, inputWithoutExplicit) - implicitJoin(collect) shouldParseWith (inputWithoutExplicit, inputResult) + stripComments(collect) `shouldParseWith` (input, inputWithoutComments) + explicitJoin(collect) `shouldParseWith` + (inputWithoutComments, inputWithoutExplicit) + implicitJoin(collect) `shouldParseWith` (inputWithoutExplicit, inputResult) - preprocess(file_input) shouldParse List[Lexeme]( - a, ";", a, "=", 'yield, 'from, a, "=", a, ";", NL, - NL, - a, ";", a, NL, - EOS) - - preprocess(file_input) shouldParse List[Lexeme]( - a, "=", a, ">>", a, "*", a, NL, - EOS) - - val sampleProg = List[Lexeme]( - 'def, WS, Id("fun"), "(", WS, a, WS, ")", ":", NL, - WS, WS, a, "+=", WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS) - - (stripComments(collect) parse sampleProg) shouldBe List(sampleProg) - (explicitJoin(collect) parse sampleProg) shouldBe List(sampleProg) - (implicitJoin(collect) parse sampleProg) shouldBe List(sampleProg) - - preprocess(file_input) shouldParse sampleProg - - val sampleProg2 = List[Lexeme]( - 'def, WS, Id("fun"), "(", NL, - WS, a, WS, - NL, ")", ":", NL, - WS, WS, a, "+=", Comment("Test"), BS, NL, - WS, a, NL, - WS, WS, a, "*=", a, NL, - EOS) + preprocess(file_input) `shouldParse` List(a, ";", a, "=", "yield", "from", + a, "=", a, ";", NL, NL, a, ";", a, NL, EOS) + + preprocess(file_input) `shouldParse` + List(a, "=", a, ">>", a, "*", a, NL, EOS) + + val sampleProg = List[Lexeme]("def", WS, Id("fun"), "(", WS, a, WS, ")", + ":", NL, WS, WS, a, "+=", WS, a, NL, WS, WS, a, "*=", a, NL, EOS) - (preprocess(collect) parse sampleProg2) shouldBe List(sampleProg) + parse(stripComments(collect), sampleProg) `shouldBe` List(sampleProg) + parse(explicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) + parse(implicitJoin(collect), sampleProg) `shouldBe` List(sampleProg) - preprocess(file_input) shouldParse sampleProg2 + preprocess(file_input) `shouldParse` sampleProg + + val sampleProg2 = List[Lexeme]("def", WS, Id("fun"), "(", NL, WS, a, WS, NL, + ")", ":", NL, WS, WS, a, "+=", Comment("Test"), BS, NL, WS, a, NL, WS, WS, + a, "*=", a, NL, EOS) + + parse(preprocess(collect), sampleProg2) `shouldBe` List(sampleProg) + preprocess(file_input) `shouldParse` sampleProg2 // https://en.wikibooks.org/wiki/Python_Programming/Decorators + // format: off val traceProg = List[Lexeme]( - Comment("define the Trace class that will be "), NL, - Comment("invoked using decorators"), NL, - 'class, WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, 'def, WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - WS, WS, WS, WS, NL, - WS, WS, WS, WS, WS, WS, 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'return, WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, - EOS + Comment("define the Trace class that will be "), NL, + Comment("invoked using decorators"), NL, + "class", WS, Id("Trace"), "(", Id("object"), ")", ":", NL, + WS, WS, WS, WS, "def", WS, Id("__init__"), "(", Id("self"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, + WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, + EOS ) + // format: on - argument shouldParse List[Lexeme]("*", Id("kwargs")) - argument shouldParse List[Lexeme]("**", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs2")) - arglist shouldParse List[Lexeme](Id("kwargs"), ",", WS, Id("kwargs")) - arglist shouldParse List[Lexeme]("*", Id("kwargs"), ",", "*", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs"), ",", "**", Id("kwargs")) - arglist shouldParse List[Lexeme]("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) - arglist shouldParse List[Lexeme]("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) + argument `shouldParse` List("*", Id("kwargs")) + argument `shouldParse` List("**", Id("kwargs")) + arglist `shouldParse` List("**", Id("kwargs2")) + arglist `shouldParse` List(Id("kwargs"), ",", WS, Id("kwargs")) + arglist `shouldParse` List("*", Id("kwargs"), ",", "*", Id("kwargs")) + arglist `shouldParse` List("**", Id("kwargs"), ",", "**", Id("kwargs")) + arglist `shouldParse` List("*", Id("kwargs"), ",", WS, "*", Id("kwargs")) + arglist `shouldParse` List("**", Id("kwargs"), ",", WS, "**", Id("kwargs")) + arglist `shouldParse` List("(", Id("args"), ",", WS, Id("kwargs"), ")") + arglist `shouldParse` List("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", "*", Id("args"), ",", WS, Id("kwargs"), ")") - arglist shouldParse List[Lexeme]("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") + arglist `shouldParse` + List("(", "*", Id("args"), ",", WS, "*", Id("kwargs"), ")") - test shouldParse List[Lexeme](Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") + test `shouldParse` + List(Id("f"), "(", Id("args"), ",", WS, Id("kwargs"), ")") + + test `shouldParse` + List(Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")") + + test `shouldParse` List(Id("print"), "(", Str("entering function "), WS, + "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")") // TODO is already ambiguous - // (stmt parse List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size shouldBe 1 + // (stmt `parse` List[Lexeme](Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL)).size `shouldBe` 1 - // preprocess(file_input) shouldParse traceProg + // preprocess(file_input) `shouldParse` traceProg - // (stmt parse List[Lexeme]( - // 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - // WS, WS, Id("print"), NL)).size shouldBe 1 + // (stmt `parse` List[Lexeme]( + // "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + // WS, WS, Id("print"), NL)).size `shouldBe` 1 - stmt shouldNotParse List[Lexeme]( - 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, Id("print"), NL, - // this line is indented too far - WS, WS, WS, WS, WS, WS, Id("print"), NL) + // format: off + stmt `shouldNotParse` List( + "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, + WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + WS, WS, WS, WS, Id("print"), NL, // this line is indented too far + WS, WS, WS, WS, WS, WS, Id("print"), NL + ) + // format: on // with empty lines + // format: off val traceProg2 = List[Lexeme]( - Comment("define the Trace class that will be "), NL, - Comment("invoked using decorators"), NL, - 'class, WS, Id("Trace"), "(", Id("object"), ")", ":", NL, - WS, WS, WS, WS, 'def, WS, Id("__init__"), "(", Id("self"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, - NL, - WS, WS, WS, WS, 'def, WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'for, WS, Id("arg"), WS, 'in, WS, Id("args"), ":", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, - WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, - WS, WS, NL, - NL, - NL, - NL, - WS, WS, WS, WS, WS, WS, WS, WS, 'return, WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, - EOS + Comment("define the Trace class that will be "), NL, + Comment("invoked using decorators"), NL, + "class", WS, Id("Trace"), "(", Id("object"), ")", ":", NL, WS, + WS, WS, WS, "def", WS, Id("__init__"), "(", Id("self"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("self"), ".", Id("f"), WS, "=", WS, Id("f"), NL, + NL, + WS, WS, WS, WS, "def", WS, Id("__call__"), "(", Id("self"), WS, ",", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("entering function "), WS, "+", WS, Id("self"), ".", Id("f"), ".", Id("__name__"), + ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Num("0"), NL, + WS, WS, WS, WS, WS, WS, WS, WS, "for", WS, Id("arg"), WS, "in", WS, Id("args"), ":", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("print"), "(", Str("arg {0}: {1}"), ".", Id("format"), "(", Id("i"), ",", Id("arg"), ")", ")", NL, + WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, Id("i"), "=", Id("i"), "+", Num("1"), NL, + WS, WS, NL, + NL, + NL, + NL, + WS, WS, WS, WS, WS, WS, WS, WS, "return", WS, Id("self"), ".", Id("f"), "(", "*", Id("args"), ",", WS, "**", Id("kwargs"), ")", NL, + EOS ) + // format: off - preprocess(file_input) shouldParse traceProg2 - (preprocess(file_input) parse traceProg2).size shouldBe 1 + preprocess(file_input) `shouldParse` traceProg2 + parse(preprocess(file_input), traceProg2).size `shouldBe` 1 - // suite should parse this: - val dummyin = List[Lexeme](NL, - WS, 'def, WS, Id("f"), "(", ")", ":", NL, - WS, WS, 'def, WS, Id("f"), "(", ")", ":", NL, - WS, WS, WS, Id("print"), NL, - WS, WS, WS, Id("print"), NL, - WS, WS, WS, Id("i"), NL) + // suite should `parse` this: + // format: off + val dummyin = List( + NL, + WS, "def", WS, Id("f"), "(", ")", ":", NL, + WS, WS, "def", WS, Id("f"), "(", ")", ":", NL, + WS, WS, WS, Id("print"), NL, + WS, WS, WS, Id("print"), NL, + WS, WS, WS, Id("i"), NL + ) + // format: on - //println((suite parse dummyin) mkString "\n\n") + // println((suite `parse` dummyin) mkString "\n\n") - stmt shouldNotParse List[Lexeme](WS, WS, WS, Id("i"), NL) - atom shouldNotParse List[Lexeme](WS, WS, WS, Id("i")) + stmt `shouldNotParse` List(WS, WS, WS, Id("i"), NL) + atom `shouldNotParse` List(WS, WS, WS, Id("i")) // This is the skeleton of the python parsers (and it is unambiguous) - lazy val aStmt: NT[Any] = aSimpleStmt | 'def ~> aBlock + lazy val aStmt: NT[Any] = aSimpleStmt | "def" ~> aBlock lazy val aSimpleStmt = a <~ NL - lazy val aBlock = aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) + lazy val aBlock = + aSimpleStmt | NL ~> indented(some(many(emptyLine) ~> aStmt)) lazy val aInput: NT[Any] = NL.* ~> many(aStmt <~ NL.*) <~ EOS + // format: off val dummyin2 = List[Lexeme]( - 'def, NL, - WS, a, NL, - WS, a, NL, - WS, 'def, NL, - WS, WS, a, NL, - WS, WS, a, NL, - WS, WS, a, NL, - NL, - 'def, NL, - WS, a, NL, - WS, a, NL, - WS, 'def, NL, - WS, WS,WS,WS,WS,WS, a, NL, - WS, WS,WS,WS,WS,WS, a, NL, - WS, WS,WS,WS,WS,WS, a, NL, - EOS) - - aInput shouldParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, - WS, WS, a, NL, - EOS - ) - - aInput shouldNotParse List[Lexeme]( - 'def, NL, - WS, WS, a, NL, + "def", NL, WS, a, NL, - EOS - ) - - aInput shouldParse List[Lexeme]( - 'def, NL, + WS, a, NL, + WS, "def", NL, WS, WS, a, NL, - NL, WS, WS, a, NL, - EOS - ) - - aInput shouldNotParse List[Lexeme]( - 'def, NL, WS, WS, a, NL, - NL, + NL, "def", NL, WS, a, NL, + WS, a, NL, + WS, "def", NL, + WS, WS, WS, WS, WS, WS, a, NL, + WS, WS, WS, WS, WS, WS, a, NL, + WS, WS, WS, WS, WS, WS, a, NL, EOS ) + // format: on - indentBy(WS ~ WS)(collect) shouldParseWith ( - List[Lexeme](WS, WS, a, NL), - List[Lexeme](a, NL)) + aInput `shouldParse` List("def", NL, WS, WS, a, NL, WS, WS, a, NL, EOS) + aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, WS, a, NL, EOS) + aInput `shouldParse` List("def", NL, WS, WS, a, NL, NL, WS, WS, a, NL, EOS) + aInput `shouldNotParse` List("def", NL, WS, WS, a, NL, NL, WS, a, NL, EOS) - indentBy(WS ~ WS)(collect) shouldParseWith ( - List[Lexeme](WS, WS, NL, NL, WS, WS, a, NL), - List[Lexeme](NL, NL, a, NL)) + indentBy(WS ~ WS)(collect) `shouldParseWith` + (List(WS, WS, a, NL), List(a, NL)) - (aInput parse dummyin2).size shouldBe 1 - } + indentBy(WS ~ WS)(collect) `shouldParseWith` + (List(WS, WS, NL, NL, WS, WS, a, NL), List(NL, NL, a, NL)) - // Helpers to allow writing more concise tests. - private implicit class ParserTests[T, P <% Parser[T]](p: => P) { - def shouldParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should parse "$s" """, tags:_*) { - accepts(p, s) shouldBe true - } - def shouldNotParse[ES <% Iterable[Elem]](s: ES, tags: Tag*) = - it (s"""should not parse "$s" """, tags:_*) { - accepts(p, s) shouldBe false - } - // for unambiguous parses - def shouldParseWith[ES <% Iterable[Elem]](s: ES, result: T) = - it (s"""should parse "$s" with correct result""") { - parse(p, s) shouldBe List(result) - } + parse(aInput, dummyin2).size `shouldBe` 1 } }