Fixed NGrammar ParseTree construction. Added ParseTree.posOpt. Added a new rewriteBinary implementation. Added CLAUDE.md.

robby-phd · robby-phd · commit b64fad6223c8 · 2026-02-14T09:23:04.000-06:00
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,37 @@
+# Runtime Library
+
+## Parse Tree Structure (NGrammar.parse)
+
+The LL(k) parser (`NGrammar.parse`) produces a tree of `ParseTree.Leaf` and `ParseTree.Node` values.
+
+### ParseTree.Leaf (tokens)
+Created by `LexerDfas.lex` for each token. Fields:
+- `text: String` — the matched source text (empty `""` for synthetic EOF)
+- `ruleName: String` — the lexer rule name from the grammar (e.g., `"ID"`, `"INT"`, `"LBRACE"`); for string literals, the quoted form (e.g., `"'val'"`); for EOF, `"EOF"`
+- `tipe: U32` — unique token type ID from `PredictiveTable.nameMap`
+- `isHidden: B` — `T` for whitespace/comment tokens (skipped by `LexerDfas.tokens` when `skipHidden = T`)
+- `posOpt: Option[Position]` — source position
+
+`Leaf` also extends `Token`, so `num` is an alias for `tipe`, and `toLeaf` returns `this`.
+
+### ParseTree.Node (grammar rules)
+Created by `NGrammar.parse` for non-terminal rules. Fields:
+- `children: ISZ[ParseTree]` — child nodes (Leaf or Node)
+- `ruleName: String` — the grammar rule name (e.g., `"file"`, `"exp3"`, `"infixSuffix"`)
+- `tipe: U32` — the rule's unique ID from `PredictiveTable.nameMap` (same namespace as token types)
+- `posOpt: Option[Position]` — computed from first/last child positions
+
+### Synthetic Rules (isSynthetic)
+Grammar normalization (`Grammar.normalize`) converts `*`, `+`, `?` into synthetic recursive rules named `baseName$N` (e.g., `exp3$0`, `program$1`). These have `isSynthetic = T` in the `NRule`.
+
+**Key behavior**: When `NRule.isSynthetic = T`, the parser **does not wrap** the children in a `ParseTree.Node`. Instead, children are inlined flat into the parent. This means:
+- `rule*` / `rule+` / `rule?` do NOT produce their own nodes in the parse tree
+- Their matched children appear directly as children of the enclosing non-synthetic rule
+- For example, `exp3: exp2 infixSuffix*` produces a single `exp3` Node whose children are `[exp2_node, infixSuffix_node, infixSuffix_node, ...]`
+
+### Two NRule Kinds
+- `NRule.Elements` — a sequence of elements (single production). If non-synthetic, wraps children in `ParseTree.Node(trees, name, num)`.
+- `NRule.Alts` — a choice among alternatives (multi-production). If non-synthetic, wraps the chosen alternative's result in `ParseTree.Node(trees, name, num)`. If synthetic, delegates directly to the chosen alternative without wrapping.
+
+### Name/Type ID Mapping
+`PredictiveTable.nameMap: HashSMap[String, U32]` maps both token names and rule names to unique `U32` IDs. `reverseNameMap` provides the inverse. String literal tokens use quoted keys like `"'val'"`. The same `U32` value appears in both `ParseTree.tipe` and `NRule.num`.
diff --git a/library/shared/src/main/scala/org/sireum/parser/NGrammar.scala b/library/shared/src/main/scala/org/sireum/parser/NGrammar.scala
@@ -267,7 +267,15 @@ object NGrammar {
     }
     def parseAlts(alts: NRule.Alts, i: Z): Option[(Z, ISZ[ParseTree])] = {
       pt.predict(alts.num, lookahead(i)) match {
-        case Some(n) => return parseRule(alts.alts(n), i)
+        case Some(n) =>
+          if (alts.isSynthetic) {
+            return parseRule(alts.alts(n), i)
+          } else {
+            parseRule(alts.alts(n), i) match {
+              case Some((j, trees)) => return Some((j, ISZ(ParseTree.Node(trees, alts.name, alts.num))))
+              case _ => return None()
+            }
+          }
         case _ =>
           // For synthetic choice rules (star/opt), if the last alt is an empty
           // synthetic rule, use it as a default stop/skip when prediction fails.
diff --git a/library/shared/src/main/scala/org/sireum/parser/ParseTree.scala b/library/shared/src/main/scala/org/sireum/parser/ParseTree.scala
@@ -36,6 +36,7 @@ import org.sireum.message.Position
   @pure def ruleName: String
   @pure def toST: ST
   @pure def tipe: U32
+  @pure def posOpt: Option[Position]
 
   override def string: String = {
     return toST.render
@@ -63,7 +64,17 @@ object ParseTree {
       st"""$ruleName(
           |  ${(for (child <- children) yield child.toST, ",\n")}
           |)"""
-
+    @memoize def posOpt: Option[Position] = {
+      if (children.isEmpty) {
+        return None()
+      }
+      (children(0).posOpt, children(children.size - 1).posOpt) match {
+        case (Some(pos1), Some(pos2)) => return Some(pos1.to(pos2))
+        case (Some(pos1), _) => return Some(pos1)
+        case (_, Some(pos2)) => return Some(pos2)
+        case (_, _) => return None()
+      }
+    }
   }
 
   @record class DotGenerator {
@@ -133,10 +144,65 @@ object ParseTree {
 
 
   // T1[exp] ( T1[op] T1[exp] )* => T2[exp]
+  // Uses divide-and-conquer: finds the lowest-precedence operator as the root,
+  // then recursively builds the left and right subtrees.
+  // For same-precedence operators: picks the rightmost for left-associative (so the
+  // left subtree is larger), or the leftmost for right-associative (so the right
+  // subtree is larger).
   def rewriteBinary[Builder, T1, T2](builder: Builder,
                                      bp: BinaryPrecedenceOps[Builder, T1, T2],
                                      trees: ISZ[T1],
                                      reporter: message.Reporter): T2 = {
+    val acs: ISZ[T2] = for (t <- trees) yield bp.transform(builder, t)
+    // acs layout: [operand0, op0, operand1, op1, operand2, ...]
+    // Operand at acs(i * 2), operator at acs(i * 2 + 1)
+    // lo..hi are operand indices (inclusive), with hi - lo operators between them
+    def build(lo: Z, hi: Z): T2 = {
+      if (lo == hi) {
+        return acs(lo * 2)
+      }
+      // Find the split operator: lowest precedence to be the root
+      var splitIdx: Z = lo
+      var splitPrec: Z = bp.precedence(acs(lo * 2 + 1)) match {
+        case Some(n) => n
+        case _ => bp.lowestPrecedence
+      }
+      for (i <- lo + 1 until hi) {
+        val op = acs(i * 2 + 1)
+        val p: Z = bp.precedence(op) match {
+          case Some(n) => n
+          case _ => bp.lowestPrecedence
+        }
+        val isLower = bp.isHigherPrecedence(splitPrec, p)
+        val isEqual = !isLower && !bp.isHigherPrecedence(p, splitPrec)
+        if (isLower || (isEqual && !bp.isRightAssoc(op))) {
+          splitPrec = p
+          splitIdx = i
+        }
+      }
+      val left = build(lo, splitIdx)
+      val right = build(splitIdx + 1, hi)
+      val op = acs(splitIdx * 2 + 1)
+      var l = left
+      var r = right
+      if (bp.shouldParenthesizeOperands(op)) {
+        if (bp.isBinary(l)) {
+          l = bp.parenthesize(builder, l)
+        }
+        if (bp.isBinary(r)) {
+          r = bp.parenthesize(builder, r)
+        }
+      }
+      return bp.binary(builder, l, op, r)
+    }
+    return build(0, (acs.size - 1) / 2)
+  }
+
+  // T1[exp] ( T1[op] T1[exp] )* => T2[exp]
+  def rewriteBinaryOld[Builder, T1, T2](builder: Builder,
+                                        bp: BinaryPrecedenceOps[Builder, T1, T2],
+                                        trees: ISZ[T1],
+                                        reporter: message.Reporter): T2 = {
     def construct(ts: ISZ[T2], rightAssoc: B, start: Z, stop: Z): T2 = {
       if (rightAssoc) {
         var r = ts(stop)
@@ -244,5 +310,4 @@ object ParseTree {
     }
     return acs(0)
   }
-
 }