Cumulative updates

amoeller · amoeller · commit df34460b79af · 2018-12-04T14:52:31.000+01:00
diff --git a/README.md b/README.md
@@ -90,6 +90,31 @@ where `<source>` can be a file or a directory containing `.tip` files and
 To see the possible options, run `tip` without options.
 Option `-verbose` is recommended when developing and testing analyses.
 
+## Visualizing control flow graphs and analysis results
+
+The main function `Tip.scala` emits control flow graphs and analysis results as ".dot" files 
+that can be processed by [Graphviz](https://www.graphviz.org/) to produce images, for example using the Graphviz dot command-line tool:
+```
+dot -O -Tpng out/example.tip__sign.dot
+```
+
+## Program normalization
+
+Some analyses require the programs use restricted subsets of TIP. 
+The following kinds of normalization can be performed automatically:
+
+- `-normalizecalls`: 
+  normalizes function calls to be top-level only and such that arguments are identifiers 
+  (e.g. `id1 = id2(id3,id4)`)
+- `-normalizereturns`: 
+  normalizes return expressions to be identifiers 
+  (e.g. `return id`)
+- `-normalizepointers`: 
+  normalizes pointer operations to primitive statements
+  (`id = alloc P` where `P` is null or an integer constant, `id1 = &id2`, `id1 = id2`, `id1 = *id2`, `*id1 = id2`, or`id = null`) 
+ 
+If one or more of these options are enabled, the normalized program is printed to e.g. `out/example.tip__normalized.tip`. 
+ 
 ## Help to Scala novices
 
 This implementation takes advantage of many cool Scala language features that allow the code to be concise and flexible. 
@@ -100,6 +125,7 @@ Still, the following language features deserve some extra attention:
 - [traits](https://docs.scala-lang.org/tour/traits.html)
 - [case classes](https://docs.scala-lang.org/tour/case-classes.html)
 - [companion objects](https://docs.scala-lang.org/tour/singleton-objects.html)
+- [abstract type members](https://docs.scala-lang.org/tour/abstract-types.html) (see e.g. [GenericLattices.scala](src/tip/lattices/GenericLattices.scala))
 - [implicit parameters](https://docs.scala-lang.org/tour/implicit-parameters.html) (see e.g. [TypeAnalysis.scala](src/tip/analysis/TypeAnalysis.scala))
 - [implicit conversions](https://docs.scala-lang.org/tour/implicit-conversions.html) (see e.g. [TipType.ast2typevar](src/tip/types/Types.scala))
 - [implicit classes](https://docs.scala-lang.org/overviews/core/implicit-classes.html) (see e.g. [AstNodeData.AstNodeWithDeclaration](src/tip/ast/AstNodeData.scala))
diff --git a/src/tip/Tip.scala b/src/tip/Tip.scala
@@ -74,6 +74,8 @@ class RunOption {
     */
   var concolic = false
 
+  var normalizer: tip.ast.Normalizer = tip.ast.NoNormalizer
+
   /**
     * Checks that a source file or directory has been provided.
     * @return true if success
@@ -104,8 +106,6 @@ object Tip extends App {
         |
         | Options for analyzing programs:
         |
-        | -cfg               construct the (intraprocedural) control-flow graph, but do not perform any analysis
-        | -icfg              construct the interprocedural control-flow graph, but do not perform any analysis
         | -types             enable type analysis
         | -cfa               enable control-flow analysis (interprocedural analyses use the call-graph obtained by this analysis)
         | -andersen          enable Andersen pointer analysis
@@ -140,8 +140,16 @@ object Tip extends App {
         | -run               run the program as the last step
         | -concolic          perform concolic testing (search for failing inputs using dynamic symbolic execution)
         |
+        | Options for normalizing programs (can be combined):
+        |
+        | -normalizereturns  normalize return statements
+        | -normalizecalls    normalize function calls
+        | -normalizepointers normalize pointer usages
+        |
         | Other options:
         |
+        | -cfg               construct the (intraprocedural) control-flow graph, but do not perform any analysis
+        | -icfg              construct the interprocedural control-flow graph, but do not perform any analysis
         | -verbose           verbose output
       """.stripMargin)
 
@@ -163,7 +171,11 @@ object Tip extends App {
         case Failure(e: Throwable) =>
           log.error(s"Failure parsing the program: $file", e)
           sys.exit(1)
-        case Success(programNode: AProgram) =>
+        case Success(parsedNode: AProgram) =>
+          // run normalizer
+          val programNode = options.normalizer.normalizeProgram(parsedNode)
+          Output.output(file, OtherOutput(OutputKindE.normalized), programNode.toString, options.out)
+
           // run declaration analysis
           // (for information about the use of 'implicit', see [[tip.analysis.TypeAnalysis]])
           implicit val declData: DeclarationData = new DeclarationAnalysis(programNode).analyze()
@@ -278,6 +290,12 @@ object Tip extends App {
     val s = args(i)
     if (s.head == '-')
       s match {
+        case "-normalizepointers" =>
+          options.normalizer = new tip.ast.CombineNormalizers(options.normalizer, tip.ast.PointersNormalizer)
+        case "-normalizecalls" =>
+          options.normalizer = new tip.ast.CombineNormalizers(options.normalizer, tip.ast.CallsNormalizer)
+        case "-normalizereturns" =>
+          options.normalizer = new tip.ast.CombineNormalizers(options.normalizer, tip.ast.ReturnsNormalizer)
         case "-cfg" =>
           options.cfg = true
         case "-icfg" =>
diff --git a/src/tip/analysis/SignAnalysis.scala b/src/tip/analysis/SignAnalysis.scala
@@ -72,8 +72,8 @@ trait InterprocSignAnalysisMisc[N] {
 trait InterprocSignAnalysisFunctions extends MapLiftLatticeSolver[CfgNode] with InterprocSignAnalysisMisc[CfgNode] with InterproceduralForwardDependencies {
 
   override def funsub(n: CfgNode, x: lattice.Element): lattice.sublattice.Element = {
-    import lattice.sublattice._
-    import cfg._
+    import lattice.sublattice._ // with this import, 'sublattice' refers to the lattice of abstract states
+    import cfg._ // gives easy access to the functionality in InterproceduralProgramCfg
 
     new NormalizedCalls().assertContainsNode(n.data)
 
@@ -87,7 +87,7 @@ trait InterprocSignAnalysisFunctions extends MapLiftLatticeSolver[CfgNode] with
       // return node
       case CfgStmtNode(_, _, _, ret: AReturnStmt) =>
         val j = join(n, x)
-        j + (AstOps.returnId -> lattice.sublattice.sublattice.sublattice.eval(ret.value, j))
+        j + (AstOps.returnId -> sublattice.sublattice.eval(ret.value, j))
 
       // call nodes (like no-ops here)
       case _: CfgCallNode => join(n, x)
diff --git a/src/tip/ast/Ast.scala b/src/tip/ast/Ast.scala
@@ -165,6 +165,8 @@ case class AProgram(funs: List[AFunDeclaration], loc: Loc) extends AstNode {
   private def findMainFunction(): Option[AFunDeclaration] =
     funs.find(decl => decl.name == "main")
 
+  override def toString: String =
+    s"${this.print(PartialFunction.empty)}"
 }
 
 case class AFunDeclaration(name: String, args: List[AIdentifierDeclaration], stmts: AFunBlockStmt, loc: Loc) extends ADeclaration {
diff --git a/src/tip/ast/TipNormalizers.scala b/src/tip/ast/TipNormalizers.scala
@@ -0,0 +1,221 @@
+package tip.ast
+
+class Normalizer {
+
+  var lastUid: Int = 0
+  def newVariable(): String = {
+    lastUid += 1
+    s"tmp_$lastUid"
+  }
+
+  /** The list of declarations that have to be added in the current function */
+  val declarations: scala.collection.mutable.ListBuffer[AIdentifierDeclaration] = scala.collection.mutable.ListBuffer.empty
+
+  /** Adds a declaration */
+  def addDeclaration(decl: AIdentifierDeclaration): Unit = declarations += decl
+
+  /** The list of statements to be added in the current block.
+    * This will mostly be assignments but can be any statement that can be in a nested block */
+  val statements: scala.collection.mutable.ListBuffer[AStmtInNestedBlock] = scala.collection.mutable.ListBuffer.empty
+
+  /** Adds a statement */
+  def addStatement(stmt: AStmtInNestedBlock): Unit = statements += stmt
+
+  /** Normalizes an AExpr */
+  def normalizeExpr(e: AExpr): AExpr =
+    e match {
+      case a: Assignable => normalizeAssignable(a)
+      case r: ARecord => r.copy(fields = r.fields.map(normalizeRecordField))
+      case call: ACallFuncExpr => call.copy(targetFun = normalizeExpr(call.targetFun), args = call.args.map(normalizeExpr))
+      case op: ABinaryOp => op.copy(left = normalizeExpr(op.left), right = normalizeExpr(op.right))
+      case _ => e
+    }
+
+  /** Normalizes an AExpr into an AIdentifier. This is not used by the Normalizer class but useful for subclasses, as this is a common operation */
+  def normalizeToIdentifier(right: AExpr): AIdentifier =
+    right match {
+      case id: AIdentifier => id
+      case _ =>
+        val tmpVar = newVariable()
+        val id = AIdentifier(tmpVar, right.loc)
+        addDeclaration(AIdentifierDeclaration(tmpVar, right.loc))
+        addStatement(normalizeStmtInNestedBlock(AAssignStmt(AIdentifier(tmpVar, right.loc), right, right.loc)))
+        id
+    }
+
+  /** Normalizes an ARecordField. */
+  def normalizeRecordField(f: ARecordField): ARecordField =
+    f.copy(exp = normalizeExpr(f.exp))
+
+  /** Normalizes an Assignable. */
+  def normalizeAssignable(e: Assignable): Assignable =
+    e match {
+      case _: AIdentifier => e
+      case uop: AUnaryOp => uop.copy(target = normalizeExpr(uop.target))
+    }
+
+  /** Helper function to insert statements if there are any to insert before `stmt`. Otherwise, returns the same statement. */
+  def nestedBlock(stmt: AStmtInNestedBlock): AStmtInNestedBlock =
+    if (statements.isEmpty) { stmt } else {
+      val res = ANestedBlockStmt(statements.toList :+ stmt, stmt.loc)
+      statements.clear()
+      res
+    }
+
+  /** Normalizes an AStmtInNestedBlock. */
+  def normalizeStmtInNestedBlock(stmt: AStmtInNestedBlock): AStmtInNestedBlock =
+    stmt match {
+      case stmt: AAssignStmt =>
+        nestedBlock(stmt.copy(left = normalizeAssignable(stmt.left), right = normalizeExpr(stmt.right)))
+      case stmt: ANestedBlockStmt =>
+        stmt.copy(body = stmt.body.map(normalizeStmtInNestedBlock))
+      case stmt: AIfStmt =>
+        // It is important to first normalizes the if/else branches before calling nestedBlock, so that added statements for each branch remain in the corresponding branch, and added statements for the guard are added before the if.
+        val ifBranch2 = normalizeStmtInNestedBlock(stmt.ifBranch)
+        val elseBranch2 = stmt.elseBranch.map(normalizeStmtInNestedBlock)
+        nestedBlock(stmt.copy(guard = normalizeExpr(stmt.guard), ifBranch = ifBranch2, elseBranch = elseBranch2))
+      case stmt: AOutputStmt =>
+        nestedBlock(stmt.copy(value = normalizeExpr(stmt.value)))
+      case stmt: AErrorStmt =>
+        nestedBlock(stmt.copy(value = normalizeExpr(stmt.value)))
+      case stmt: AWhileStmt =>
+        val innerBlock2 = normalizeStmtInNestedBlock(stmt.innerBlock)
+        nestedBlock(stmt.copy(guard = normalizeExpr(stmt.guard), innerBlock = innerBlock2))
+    }
+
+  /** Normalizes a AReturnStmt */
+  def normalizeReturnStmt(ret: AReturnStmt): AReturnStmt =
+    ret.copy(value = normalizeExpr(ret.value))
+
+  /** Normalizes a AFunBlockStmt */
+  def normalizeFunBlockStmt(stmt: AFunBlockStmt): AFunBlockStmt = {
+    // Normalizes its body
+    val others2 = stmt.others.map(normalizeStmtInNestedBlock)
+    // And normalizes its return statement
+    val ret2 = normalizeReturnStmt(stmt.ret)
+    // Add declarations to the function if needed
+    val declarations2 = if (declarations.isEmpty) {
+      stmt.declarations
+    } else {
+      stmt.declarations :+ AVarStmt(declarations.toList, stmt.loc)
+    }
+    // Add statements before the return statement if needed
+    val others3 = if (statements.isEmpty) {
+      others2
+    } else {
+      others2 :+ ANestedBlockStmt(statements.toList, stmt.loc)
+    }
+    declarations.clear()
+    statements.clear()
+    stmt.copy(declarations = declarations2, others = others3, ret = ret2)
+  }
+
+  /** Normalizes a AFunDeclaration */
+  def normalizeDeclaration(decl: AFunDeclaration): AFunDeclaration =
+    decl.copy(stmts = normalizeFunBlockStmt(decl.stmts))
+
+  /** Normalizes a AProgram */
+  def normalizeProgram(program: AProgram): AProgram =
+    program.copy(funs = program.funs.map(normalizeDeclaration))
+}
+
+/**
+  * Combines two normalizers, running `normalizer1` followed by `normalizer2` on the input program.
+  */
+class CombineNormalizers(normalizer1: Normalizer, normalizer2: Normalizer) extends Normalizer {
+  override def normalizeProgram(program: AProgram): AProgram =
+    normalizer2.normalizeProgram(normalizer1.normalizeProgram(program))
+}
+
+/** A normalizer that does nothing. */
+object NoNormalizer extends Normalizer {
+  // We don't *have* to redefine normalizeProgram, because its definition in Normalizer ends up returning the same program, but this makes things clearer.
+  override def normalizeProgram(program: AProgram): AProgram = program
+}
+
+/**
+  * Normalize return statements so that we only have returns of the form `return id` where id is an identifier
+  */
+object ReturnsNormalizer extends Normalizer {
+  override def normalizeReturnStmt(ret: AReturnStmt): AReturnStmt =
+    // [[return e]] becomes [[return id]]
+    ret.copy(value = normalizeToIdentifier(ret.value))
+}
+
+/**
+  * Normalize function calls to fit into the NormalizedCalls sub-language, in which all function calls should have the form [[id = id(id1, id2, ...)]].
+  */
+object CallsNormalizer extends Normalizer {
+  override def normalizeExpr(e: AExpr): AExpr =
+    e match {
+      case f: ACallFuncExpr =>
+        // Normalizes the function call, but also replaces it by an identifier assigned to its result.
+        // The only case where this replacement is not done is handled by normalizeStmtInNestedBlock, when we already have the form [[id = e(e1, e2, ...)]]
+        normalizeToIdentifier(normalizeFunctionCall(f))
+      case _ => super.normalizeExpr(e)
+    }
+
+  def normalizeFunctionCall(f: ACallFuncExpr): ACallFuncExpr =
+    // [[e(e1, e2, ...)]] becomes [[id(id1, id2, ...)]]
+    f.copy(targetFun = normalizeToIdentifier(f.targetFun), args = f.args.map(normalizeToIdentifier))
+
+  override def normalizeStmtInNestedBlock(stmt: AStmtInNestedBlock) =
+    stmt match {
+      case AAssignStmt(left: AIdentifier, right: ACallFuncExpr, loc) =>
+        // [[id = e(e1, e2, ...)]] form, normalize the call e(e1, e2, ...) to id(id1, id2, ...)
+        nestedBlock(AAssignStmt(left, normalizeFunctionCall(right), loc))
+      case _ =>
+        // Other cases are handled by normalizeExpr.
+        super.normalizeStmtInNestedBlock(stmt)
+    }
+}
+
+/**
+  * Normalize pointers to fit in the NormalizedForPointsToAnalysis sub-language.
+  * In that sub-language, the only allowed pointer statements are the following:
+  * id = alloc P where P is null or an integer constant
+  * id1 = &id2
+  * id1 = id2
+  * id1 = *id2
+  * *id1 = id2
+  * id = null
+  */
+object PointersNormalizer extends Normalizer {
+
+  /** Normalizes the left-hand side of an assignment so that it has the form id or *id */
+  def normalizeLeft(left: Assignable): Assignable =
+    left match {
+      case _: AIdentifier => left
+      case AUnaryOp(_, _: AIdentifier, _) => left
+      case AUnaryOp(op, target, loc) =>
+        val tmpVar = newVariable()
+        val id = AIdentifier(tmpVar, left.loc)
+        addDeclaration(AIdentifierDeclaration(tmpVar, left.loc))
+        addStatement(normalizeStmtInNestedBlock(AAssignStmt(id, target, left.loc)))
+        AUnaryOp(op, id, left.loc)
+    }
+
+  /** Normalize the right-hand side of an assignment so that it has one of the form alloc P, null, &id, *id, or id. */
+  def normalizeRight(right: AExpr): AExpr =
+    right match {
+      case op: AUnaryOp =>
+        op.copy(target = normalizeToIdentifier(op.target))
+      case _: AIdentifier => right
+      case _: ANull => right
+      case _: AAlloc => right
+      case _ =>
+        /* Other cases are treated as already normalized. Maybe it shouldn't be the case, but such other cases are not supported by the NormalizedForPointsToAnalysis sub-language in any case. */
+        right
+    }
+
+  override def normalizeStmtInNestedBlock(stmt: AStmtInNestedBlock) =
+    stmt match {
+      case AAssignStmt(left: AIdentifier, right, _) =>
+        // [[id = right]] form, normalizes right only
+        nestedBlock(AAssignStmt(left, normalizeRight(right), stmt.loc))
+      case AAssignStmt(left, right, _) =>
+        // [[left = right]] form where left is a unary operation, normalizes left, and normalizes right to an identifier.
+        nestedBlock(AAssignStmt(normalizeLeft(left), normalizeToIdentifier(right), stmt.loc))
+      case _ => super.normalizeStmtInNestedBlock(stmt)
+    }
+}
diff --git a/src/tip/interpreter/Interpreter.scala b/src/tip/interpreter/Interpreter.scala
@@ -269,6 +269,7 @@ abstract class Interpreter(program: AProgram)(implicit declData: DeclarationData
     */
   private def input(): IntValue = {
     print(s"Enter input: ")
+    Console.flush()
     val line = scala.io.StdIn.readLine()
     if (line == null) {
       spec.constInt(0)
diff --git a/src/tip/lattices/GenericLattices.scala b/src/tip/lattices/GenericLattices.scala
@@ -9,6 +9,11 @@ trait Lattice {
 
   /**
     * The type of the elements of this lattice.
+    *
+    * To novice Scala programmers:
+    * This is an example of an abstract type member. In this trait, `Element` is just a name for a type.
+    * It is constrained in sub-traits and sub-classes, similarly to type parameters in generic classes.
+    * For more information about abstract type members in Scala, see [[https://docs.scala-lang.org/tour/abstract-types.html]].
     */
   type Element
 
diff --git a/src/tip/util/Output.scala b/src/tip/util/Output.scala
@@ -23,6 +23,7 @@ object Output {
       case OtherOutput(OutputKindE.`cfg`) => "_cfg.dot"
       case OtherOutput(OutputKindE.`icfg`) => "_icfg.dot"
       case OtherOutput(OutputKindE.`types`) => "_types.ttip"
+      case OtherOutput(OutputKindE.`normalized`) => "_normalized.tip"
       case DataFlowOutput(k) =>
         s"_$k.dot"
       case _ => ???
@@ -118,7 +119,7 @@ object Output {
   * Different kinds of output (determine output file names).
   */
 object OutputKindE extends Enumeration {
-  val cfg, icfg, types = Value
+  val cfg, icfg, types, normalized = Value
 }
 
 sealed trait OutputKind

Original file line number	Diff line number	Diff line change
`@@ -165,6 +165,8 @@ case class AProgram(funs: List[AFunDeclaration], loc: Loc) extends AstNode {`
`165`	`165`	`private def findMainFunction(): Option[AFunDeclaration] =`
`166`	`166`	`funs.find(decl => decl.name == "main")`
`167`	`167`
	`168`	`+ override def toString: String =`
	`169`	`+ s"${this.print(PartialFunction.empty)}"`
`168`	`170`	`}`
`169`	`171`
`170`	`172`	`case class AFunDeclaration(name: String, args: List[AIdentifierDeclaration], stmts: AFunBlockStmt, loc: Loc) extends ADeclaration {`