Add TransitiveClosure test, improve handling of generic construct to ensure comments are added

FedericoPonzi · FedericoPonzi · commit 6bd08fee9f8c · 2025-09-15T17:59:32.000+01:00
diff --git a/src/main/java/me/fponzi/tlaplusformatter/TlaDocBuilder.java b/src/main/java/me/fponzi/tlaplusformatter/TlaDocBuilder.java
@@ -48,6 +48,7 @@ private void registerDefaultConstructs() {
         registry.register(new VariableConstruct());
         registry.register(new OperatorConstruct());
         registry.register(new ConstantConstruct());
+        registry.register(new InfixLhsConstruct());
 
         // Basic elements
         registry.register(new IdentifierConstruct());
@@ -103,6 +104,7 @@ private void registerDefaultConstructs() {
         registry.register(new TupleConstruct());
         registry.register(new SetEnumerateConstruct());
         registry.register(new TimesConstruct());
+        registry.register(new IdPrefixConstruct());
 
     }
 
diff --git a/src/main/java/me/fponzi/tlaplusformatter/constructs/ConstructContext.java b/src/main/java/me/fponzi/tlaplusformatter/constructs/ConstructContext.java
@@ -45,10 +45,14 @@ public FormatConfig getConfig() {
      */
     public Doc buildChild(TreeNode child) {
         var childDoc = docBuilder.build(child);
-        var comments = Arrays.stream(child.getPreComments())
+        return addComments(child, childDoc);
+    }
+
+    public static Doc addComments(TreeNode node, Doc mainDoc) {
+        var comments = Arrays.stream(node.getPreComments())
                 .map((v) -> Doc.text(v.trim()))
                 .collect(Collectors.toList());
-        comments.add(childDoc);
+        comments.add(mainDoc);
         return Doc.intersperse(Doc.line(), comments);
     }
 
diff --git a/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/AbstractAppendImageConstruct.java b/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/AbstractAppendImageConstruct.java
@@ -5,13 +5,34 @@
 import me.fponzi.tlaplusformatter.constructs.TlaConstruct;
 import tla2sany.st.TreeNode;
 
+import java.util.Optional;
+
 public abstract class AbstractAppendImageConstruct implements TlaConstruct {
     public abstract int getSupportedNodeKind();
 
     public abstract String getName();
 
     @Override
     public final Doc buildDoc(TreeNode node, ConstructContext context, int indentSize) {
-        return Doc.text(node.getHumanReadableImage());
+        var z = node.zero();
+        var o = node.one();
+        if ((z == null || z.length == 0) && (o == null || o.length == 0)) {
+            return Doc.text(node.getHumanReadableImage());
+        }
+        return Optional.ofNullable(buildChildren(node, context, z)).orElse(buildChildren(node, context, o));
+    }
+
+    private Doc buildChildren(TreeNode node, ConstructContext context, TreeNode[] c) {
+        if (c == null) {
+            return null;
+        }
+        var childDoc = context.buildChild(c[0]);
+        for (int i = 1; i < c.length; i++) {
+            var nextChildDoc = context.buildChild(c[i]);
+            if (nextChildDoc != null) {
+                childDoc = childDoc.appendSpace(nextChildDoc);
+            }
+        }
+        return childDoc;
     }
 }
diff --git a/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/FunctionDefinitionConstruct.java b/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/FunctionDefinitionConstruct.java
@@ -28,15 +28,24 @@ public int getSupportedNodeKind() {
     @Override
     public Doc buildDoc(TreeNode node, ConstructContext context, int indentSize) {
         var o = node.one();
-        assert (o != null && o.length == 6);
+        assert (o != null && o.length >= 6);
         var oDoc = Arrays.stream(o).map(context::buildChild).collect(Collectors.toList());
+        var boundVars = oDoc.get(2);
+        for (int i = 3; i < o.length - 3; i++) {
+            if (o[i].getImage().equals(",")) {
+                boundVars = boundVars.append(oDoc.get(i));
+                continue;
+            }
+            boundVars = boundVars.appendLineOrSpace(oDoc.get(i));
+        }
+
         return Doc.group(
                 oDoc.get(0) // function name
                         .append(oDoc.get(1))// [
-                        .append(oDoc.get(2)) // bound variables
-                        .append(oDoc.get(3)) // ]
-                        .appendSpace(oDoc.get(4)) // ==
-                        .appendLineOrSpace(oDoc.get(5)) // expr
+                        .append(boundVars) // one or more comma separated bound variables
+                        .append(oDoc.get(o.length - 3)) // ]
+                        .appendSpace(oDoc.get(o.length - 2)) // ==
+                        .appendLineOrSpace(oDoc.get(o.length - 1)) // expr
         ).indent(indentSize);
     }
 }
diff --git a/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/IdPrefixConstruct.java b/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/IdPrefixConstruct.java
@@ -0,0 +1,18 @@
+package me.fponzi.tlaplusformatter.constructs.impl;
+
+import me.fponzi.tlaplusformatter.constructs.NodeKind;
+
+/**
+ * So far, I've always seen this used with an empty image.
+ */
+public class IdPrefixConstruct extends AbstractAppendImageConstruct {
+    @Override
+    public int getSupportedNodeKind() {
+        return NodeKind.ID_PREFIX.getId();
+    }
+
+    @Override
+    public String getName() {
+        return "N_IdPrefix";
+    }
+}
diff --git a/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/InfixLhsConstruct.java b/src/main/java/me/fponzi/tlaplusformatter/constructs/impl/InfixLhsConstruct.java
@@ -0,0 +1,18 @@
+package me.fponzi.tlaplusformatter.constructs.impl;
+
+import me.fponzi.tlaplusformatter.constructs.NodeKind;
+
+/**
+ * Example: 'R**T'
+ */
+public class InfixLhsConstruct extends AbstractAppendImageConstruct {
+    @Override
+    public int getSupportedNodeKind() {
+        return NodeKind.INFIX_LHS.getId();
+    }
+
+    @Override
+    public String getName() {
+        return "N_InfixLHS";
+    }
+}
diff --git a/src/test/java/me/fponzi/tlaplusformatter/InputFolderTest.java b/src/test/java/me/fponzi/tlaplusformatter/InputFolderTest.java
@@ -34,7 +34,6 @@ void testTowerOfHanoi() {
     }
 
     @Test
-        // TODO: fix.
     void testSlush() {
         testSpecFiles("Slush");
     }
@@ -43,4 +42,9 @@ void testSlush() {
     void testAllConstructs() {
         testSpecFiles("AllConstructs");
     }
+
+    @Test
+    void testTransitiveClosure() {
+        testSpecFiles("TransitiveClosure");
+    }
 }
diff --git a/src/test/java/me/fponzi/tlaplusformatter/LexiconTest.java b/src/test/java/me/fponzi/tlaplusformatter/LexiconTest.java
@@ -90,10 +90,7 @@ boolean compareAst(TreeNode root1, TreeNode root2) {
     void compareComments(TreeNode root1, TreeNode root2) {
         boolean hasComments1 = root1.getPreComments() != null && root1.getPreComments().length > 0;
         boolean hasComments2 = root2.getPreComments() != null && root2.getPreComments().length > 0;
-        if (hasComments1 != hasComments2) {
-            System.out.println("Node kind: " + root1.getKind());
-        }
-        assertEquals(hasComments1, hasComments2);
+        assertEquals(hasComments1, hasComments2, "Comments do not match, Node image: " + root1.getHumanReadableImage() + " location: " + root1.getLocation());
         if (hasComments1) {
             assertEquals(root1.getPreComments().length, root2.getPreComments().length);
             for (int i = 0; i < root1.getPreComments().length; i++) {
diff --git a/src/test/resources/outputs/TransitiveClosure.tla b/src/test/resources/outputs/TransitiveClosure.tla
@@ -0,0 +1,182 @@
+-------------------------- MODULE TransitiveClosure -------------------------
+(***************************************************************************)
+(* Mathematicians define a relation R to be a set of ordered pairs, and    *)
+(* write `s R t' to mean `<<s, t>> \in R'.  The transitive closure TC(R)   *)
+(* of the relation R is the smallest relation containing R such that,        *)
+(* `s TC(R) t' and `t TC(R) u' imply `s TC(R) u', for any s, t, and u.     *)
+(* This module shows several ways of defining the operator TC.             *)
+(*                                                                         *)
+(* It is sometimes more convenient to represent a relation as a            *)
+(* Boolean-valued function of two arguments, where `s R t' means R[s, t].  *)
+(* It is a straightforward exercise to translate everything in this module *)
+(* to that representation.                                                 *)
+(*                                                                         *)
+(* Mathematicians say that R is a relation on a set S iff R is a subset of *)
+(* S \X S.  Let the `support' of a relation R be the set of all elements s *)
+(* such that `s R t' or `t R s' for some t.  Then any relation is a        *)
+(* relation on its support.  Moreover, the support of R is the support of  *)
+(* TC(R).  So, to define the transitive closure of R, there's no need to   *)
+(* say what set R is a relation on.                                        *)
+(*                                                                         *)
+(* Let's begin by importing some modules we'll need and defining the the   *)
+(* support of a relation.                                                  *)
+(***************************************************************************)
+EXTENDS Integers, Sequences, FiniteSets, TLC
+
+Support(R) == { r[1]: r \in  R} \cup { r[2]: r \in  R}
+
+(***************************************************************************)
+(* A relation R defines a directed graph on its support, where there is an *)
+(* edge from s to t iff `s R t'.  We can define TC(R) to be the relation   *)
+(* such that `s R t' holds iff there is a path from s to t in this graph.  *)
+(* We represent a path by the sequence of nodes on the path, so the length *)
+(* of the path (the number of edges) is one greater than the length of the *)
+(* sequence.  We then get the following definition of TC.                  *)
+(***************************************************************************)
+TC(R) ==
+  LET S ==  Support( R)
+  IN {<<s ,t>> \in  S \X  S: \E p \in  Seq( S):
+          /\  Len( p) > 1
+          /\  p[1] =  s
+          /\  p[ Len( p)] =  t
+          /\ \A i \in 1 .. (  Len( p) - 1 ): <<  p[ i],  p[ i + 1] >> \in  R
+      }
+
+(***************************************************************************)
+(* This definition can't be evaluated by TLC because Seq(S) is an infinite *)
+(* set.  However, it's not hard to see that if R is a finite set, then it  *)
+(* suffices to consider paths whose length is at most Cardinality(S).      *)
+(* Modifying the definition of TC we get the following definition that     *)
+(* defines TC1(R) to be the transitive closure of R, if R is a finite set. *)
+(* The LET expression defines BoundedSeq(S, n) to be the set of all        *)
+(* sequences in Seq(S) of length at most n.                                *)
+(***************************************************************************)
+TC1(R) ==
+  LET BoundedSeq(S, n) ==  UNION {[1 ..  i ->  S]: i \in 0 ..  n}
+      S ==  Support( R)
+  IN {<<s ,t>> \in  S \X  S: \E p \in  BoundedSeq( S,  Cardinality( S) + 1):
+          /\  Len( p) > 1
+          /\  p[1] =  s
+          /\  p[ Len( p)] =  t
+          /\ \A i \in 1 .. (  Len( p) - 1 ): <<  p[ i],  p[ i + 1] >> \in  R
+      }
+
+(***************************************************************************)
+(* This naive method used by TLC to evaluate expressions makes this        *)
+(* definition rather inefficient.  (As an exercise, find an upper bound on *)
+(* its complexity.) To obtain a definition that TLC can evaluate more      *)
+(* efficiently, let's look at the closure operation more algebraically.    *)
+(* Let's define the composition of two relations R and T as follows.       *)
+(***************************************************************************)
+R ** T ==
+  LET SR ==  Support( R)
+      ST ==  Support( T)
+  IN {<<r ,t>> \in  SR \X  ST: \E s \in  SR \cap  ST:
+          ( <<  r,  s >> \in  R ) /\ ( <<  s,  t >> \in  T )
+      }
+
+(***************************************************************************)
+(* We can then define the closure of R to equal                            *)
+(*                                                                         *)
+(*    R \cup (R ** R) \cup (R ** R ** R) \cup ...                          *)
+(*                                                                         *)
+(* For R finite, this union converges to the transitive closure when the   *)
+(* number of terms equals the cardinality of the support of R.  This leads *)
+(* to the following definition.                                            *)
+(***************************************************************************)
+TC2(R) ==
+  LET C[n \in  Nat] ==
+        IF  n = 0 THEN  R ELSE  C[ n - 1] \cup (  C[ n - 1] **  R )
+  IN IF  R = {} THEN {} ELSE  C[ Cardinality( Support( R)) - 1]
+
+(***************************************************************************)
+(* These definitions of TC1 and TC2 are somewhat unsatisfactory because of *)
+(* their use of Cardinality(S).  For example, it would be easy to make a   *)
+(* mistake and use Cardinality(S) instead of Cardinality(S)+1 in the       *)
+(* definition of TC1(R).  I find the following definition more elegant     *)
+(* than the preceding two.  It is also more asymptotically more efficient  *)
+(* because it makes O(log Cardinality (S)) rather than O(Cardinality(S))   *)
+(* recursive calls.                                                        *)
+(***************************************************************************)
+RECURSIVE TC3 ( _ )
+TC3(R) ==
+  LET RR ==  R **  R IN IF  RR \subseteq  R THEN  R ELSE  TC3( R \cup  RR)
+
+(***************************************************************************)
+(* The preceding two definitions can be made slightly more efficient to    *)
+(* execute by expanding the definition of ** and making some simple        *)
+(* optimizations.  But, this is unlikely to be worth complicating the      *)
+(* definitions for.                                                        *)
+(*                                                                         *)
+(* The following definition is (asymptotically) the most efficient.  It is *)
+(* essentially the TLA+ representation of Warshall's algorithm.            *)
+(* (Warshall's algorithm is typically written as an iterative procedure    *)
+(* for the case of a relation on a set i..j of integers, when the relation *)
+(* is represented as a Boolean-valued function.)                           *)
+(***************************************************************************)
+TC4(R) ==
+  LET S ==  Support( R)
+      RECURSIVE TCR ( _ )
+      TCR(T) ==
+        IF  T = {}
+        THEN  R
+        ELSE LET r == CHOOSE s \in  T:  TRUE
+                 RR ==  TCR( T \ {  r })
+          IN  RR \cup
+                {<<s ,t>> \in  S \X  S: <<  s,  r >> \in  RR /\ <<  r,  t >> \in  RR
+                }
+  IN  TCR( S)
+
+(***************************************************************************)
+(* We now test that these four definitions are equivalent.  Since it's     *)
+(* unlikely that all four are wrong in the same way, their equivalence     *)
+(* makes it highly probable that they're correct.                          *)
+(***************************************************************************)
+ASSUME \A N \in 0 .. 3:
+         \A R \in  SUBSET ( ( 1 ..  N ) \X ( 1 ..  N ) ): /\  TC1( R) =  TC2( R)
+                                                          /\  TC2( R) =  TC3( R)
+                                                          /\  TC3( R) =  TC4( R)
+
+(***************************************************************************)
+(* Sometimes we want to represent a relation as a Boolean-valued operator, *)
+(* so we can write `s R t' as R(s, t).  This representation is less        *)
+(* convenient for manipulating relations, since an operator is not an      *)
+(* ordinary value the way a function is.  For example, since TLA+ does not *)
+(* permit us to define operator-valued operators, we cannot define a       *)
+(* transitive closure operator TC so TC(R) is the operator that represents *)
+(* the transitive closure.  Moreover, an operator R by itself cannot       *)
+(* represent a relation; we also have to know what set it is an operator   *)
+(* on.  (If R is a function, its domain tells us that.)                    *)
+(*                                                                         *)
+(* However, there may be situations in which you want to represent         *)
+(* relations by operators.  In that case, you can define an operator TC so *)
+(* that, if R is an operator representing a relation on S, and TCR is the  *)
+(* operator representing it transitive closure, then                       *)
+(*                                                                         *)
+(*   TCR(s, t) = TC(R, S, s, t)                                            *)
+(*                                                                         *)
+(* for all s, t.  Here is the definition.  (This assumes that for an       *)
+(* operator R on a set S, R(s, t) equals FALSE for all s and t not in S.)  *)
+(***************************************************************************)
+TC5(R ( _ , _ ), S, s, t) ==
+  LET CR[n \in  Nat,
+        v \in  S] ==
+        IF  n = 0
+        THEN  R( s,  v)
+        ELSE \/  CR[ n - 1,  v]
+             \/ \E u \in  S:  CR[ n - 1,  u] /\  R( u,  v)
+  IN /\  s \in  S
+     /\  t \in  S
+     /\  CR[ Cardinality( S) - 1,  t]
+
+(***************************************************************************)
+(* Finally, the following assumption checks that our definition TC5 agrees *)
+(* with our definition TC1.                                                *)
+(***************************************************************************)
+ASSUME \A N \in 0 .. 3:
+         \A R \in  SUBSET ( ( 1 ..  N ) \X ( 1 ..  N ) ):
+           LET RR(s, t) == <<  s,  t >> \in  R
+               S ==  Support( R)
+           IN \A s, t \in  S:
+                  TC5( RR,  S,  s,  t) <=> ( <<  s,  t >> \in  TC1( R) )
+=============================================================================

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,6 @@ void testTowerOfHanoi() {`
`34`	`34`	`}`
`35`	`35`
`36`	`36`	`@Test`
`37`		`- // TODO: fix.`
`38`	`37`	`void testSlush() {`
`39`	`38`	`testSpecFiles("Slush");`
`40`	`39`	`}`
`@@ -43,4 +42,9 @@ void testSlush() {`
`43`	`42`	`void testAllConstructs() {`
`44`	`43`	`testSpecFiles("AllConstructs");`
`45`	`44`	`}`
	`45`	`+`
	`46`	`+ @Test`
	`47`	`+ void testTransitiveClosure() {`
	`48`	`+ testSpecFiles("TransitiveClosure");`
	`49`	`+ }`
`46`	`50`	`}`