Skip to content

Commit c4ea78c

Browse files
committed
feat: DAG support for define expressions
1 parent 57ca74a commit c4ea78c

File tree

6 files changed

+518
-57
lines changed

6 files changed

+518
-57
lines changed

pom.xml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,9 @@
163163
<configuration>
164164
<!-- Sets the VM argument line used when unit tests are run. -->
165165
<!-- Suppress UnresolvedMavenProperty. -->
166-
<argLine>-Xms1g -Xmx2g @{surefireArgLine}</argLine>
166+
<argLine>-Xms1g -Xmx2g --add-opens fr.insee.vtl.engine/fr.insee.vtl.engine.utils.dag=ALL-UNNAMED
167+
@{surefireArgLine}
168+
</argLine>
167169
<useSystemClassLoader>false</useSystemClassLoader>
168170
<forkedProcessExitTimeoutInSeconds>180</forkedProcessExitTimeoutInSeconds>
169171
<forkCount>1</forkCount>

vtl-engine/src/main/java/fr/insee/vtl/engine/VtlSyntaxPreprocessor.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,19 @@ public void checkForMultipleAssignments() throws VtlScriptException {
7272
List<DAGStatement> bindingPseudoStatements =
7373
bindingVarIds.stream()
7474
.map(
75-
bindingVar ->
76-
new DAGStatement(DAGStatement.PSEUDO_BINDING_POSITION, bindingVar, Set.of()))
75+
bindingVarId ->
76+
new DAGStatement(
77+
DAGStatement.PSEUDO_BINDING_POSITION,
78+
new DAGStatement.Identifier(
79+
DAGStatement.Identifier.Type.VARIABLE, bindingVarId),
80+
Set.of()))
7781
.toList();
78-
Map<String, List<DAGStatement>> groupedByProducedVar =
82+
Map<DAGStatement.Identifier, List<DAGStatement>> groupedByProducedIdentifier =
7983
Stream.concat(bindingPseudoStatements.stream(), unsortedStatements.stream())
8084
.collect(Collectors.groupingBy(DAGStatement::produces));
8185

8286
List<VtlScriptException> multiProducedExceptions =
83-
groupedByProducedVar.entrySet().stream()
87+
groupedByProducedIdentifier.entrySet().stream()
8488
.filter(produced -> produced.getValue().size() > 1)
8589
.map(
8690
multiProduced ->
@@ -95,21 +99,23 @@ public void checkForMultipleAssignments() throws VtlScriptException {
9599
}
96100

97101
private VtlScriptException buildScriptExceptionFromMultipleAssignment(
98-
String varId, List<DAGStatement> statements) {
102+
DAGStatement.Identifier identifier, List<DAGStatement> statements) {
99103
final List<DAGStatement> statementsWithoutBinding =
100104
statements.stream()
101105
.filter(statement -> statement.unsortedIndex() != DAGStatement.PSEUDO_BINDING_POSITION)
102106
.toList();
103107

104108
if (statementsWithoutBinding.size() == 1) {
105109
return new VtlScriptException(
106-
"Dataset " + varId + " is part of the bindings and therefore cannot be assigned",
110+
"Dataset "
111+
+ identifier.name()
112+
+ " is part of the bindings and therefore cannot be assigned",
107113
statementsWithoutBinding.get(0).getPosition(startContext));
108114
}
109115

110116
return DAGStatement.buildMultiStatementExceptionUsingTheLastDAGStatementAsMainPosition(
111117
"Dataset "
112-
+ varId
118+
+ identifier.name()
113119
+ " has already been assigned"
114120
+ (statements.size() == statementsWithoutBinding.size()
115121
? ""

vtl-engine/src/main/java/fr/insee/vtl/engine/utils/dag/DAGBuilder.java

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ private static Graph<DAGStatement, DefaultEdge> buildDAG(List<DAGStatement> dagS
5050

5151
private static boolean dependsOn(DAGStatement stmt2, DAGStatement stmt1) {
5252
// Check if stmt2 consumes data from stmt1
53-
String produced = stmt1.produces();
53+
DAGStatement.Identifier produced = stmt1.produces();
5454
return stmt2.consumes().contains(produced);
5555
}
5656

@@ -101,55 +101,54 @@ private VtlScriptException buildVTLScriptExceptionForCycles(List<Set<DAGStatemen
101101

102102
private String buildAssignmentChain(Set<DAGStatement> cycle) {
103103
// Collect all produced variable names in this cycle
104-
Set<String> producedVars =
104+
Set<DAGStatement.Identifier> producedIdentifiers =
105105
cycle.stream().map(DAGStatement::produces).collect(Collectors.toSet());
106106

107107
// Pick a stable start
108-
String startVar =
109-
producedVars.stream()
110-
.sorted()
111-
.findFirst()
108+
DAGStatement.Identifier startIdentifier =
109+
producedIdentifiers.stream()
110+
.min(Comparator.comparing(DAGStatement.Identifier::name))
112111
.orElseThrow(() -> new AssertionError("Cycle contains out of at least two statements"));
113112

114113
StringBuilder sb = new StringBuilder();
115-
String current = startVar;
114+
DAGStatement.Identifier currentIdentifier = startIdentifier;
116115

117116
do {
118-
sb.append(current).append(" <- ");
117+
sb.append(currentIdentifier.name()).append(" <- ");
119118

120119
// Find the unique statement that produces 'current'
121-
String finalCurrent = current;
120+
DAGStatement.Identifier finalCurrentIdentifier = currentIdentifier;
122121
DAGStatement producer =
123122
cycle.stream()
124-
.filter(stmt -> stmt.produces().equals(finalCurrent))
123+
.filter(stmt -> stmt.produces().equals(finalCurrentIdentifier))
125124
.reduce(
126125
(a, b) -> {
127126
throw new AssertionError(
128127
"Multiple producers of "
129-
+ finalCurrent
128+
+ finalCurrentIdentifier.name()
130129
+ " cannot occur here, this is already validated before");
131130
})
132131
.orElseThrow(
133132
() ->
134133
new AssertionError(
135134
"A cycle is always closed, there must be a consumer for "
136-
+ finalCurrent));
135+
+ finalCurrentIdentifier.name()));
137136

138137
// Choose the next consumed variable that stays inside the cycle
139-
current =
138+
currentIdentifier =
140139
producer.consumes().stream()
141-
.filter(producedVars::contains)
140+
.filter(producedIdentifiers::contains)
142141
.findFirst()
143142
.orElseThrow(
144143
() ->
145144
new AssertionError(
146145
"Broken cycle at "
147-
+ finalCurrent
146+
+ finalCurrentIdentifier.name()
148147
+ ": no consumed var stays inside cycle"));
149-
} while (!current.equals(startVar));
148+
} while (!currentIdentifier.equals(startIdentifier));
150149

151150
// close the loop
152-
sb.append(startVar);
151+
sb.append(startIdentifier.name());
153152
return "[" + sb + "]";
154153
}
155154

vtl-engine/src/main/java/fr/insee/vtl/engine/utils/dag/DAGStatement.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import java.util.HashSet;
1010
import java.util.Set;
1111
import java.util.stream.Collectors;
12+
import org.antlr.v4.runtime.ParserRuleContext;
13+
import org.antlr.v4.runtime.tree.ParseTree;
14+
import org.antlr.v4.runtime.tree.RuleNode;
15+
import org.antlr.v4.runtime.tree.TerminalNode;
1216

1317
/**
1418
* Representation of a VTL Statement
@@ -17,7 +21,7 @@
1721
* @param produces Produced data
1822
* @param consumes Consumed data
1923
*/
20-
public record DAGStatement(int unsortedIndex, String produces, Set<String> consumes) {
24+
public record DAGStatement(int unsortedIndex, Identifier produces, Set<Identifier> consumes) {
2125

2226
public static int PSEUDO_BINDING_POSITION = Integer.MIN_VALUE;
2327

@@ -45,6 +49,28 @@ public record DAGStatement(int unsortedIndex, String produces, Set<String> consu
4549
message, lastDagStatement.getPosition(startContext), restPositions);
4650
}
4751

52+
public static DAGStatement of(
53+
Identifier.Type outIdentifierType,
54+
TerminalNode outIdentifierNode,
55+
Set<Identifier> inIdentifiers,
56+
ParserRuleContext node) {
57+
Identifier rulesetOutIdentifier =
58+
new Identifier(outIdentifierType, outIdentifierNode.getSymbol().getText());
59+
final int statementIndex = getParentStatementIndex(node);
60+
return new DAGStatement(statementIndex, rulesetOutIdentifier, inIdentifiers);
61+
}
62+
63+
private static int getParentStatementIndex(final RuleNode node) {
64+
final ParseTree parent = node.getParent();
65+
for (int i = 0; i < parent.getChildCount(); ++i) {
66+
final ParseTree child = parent.getChild(i);
67+
if (child == node) {
68+
return i;
69+
}
70+
}
71+
throw new AssertionError("Statement must always be part of the its parent node");
72+
}
73+
4874
@Override
4975
public String toString() {
5076
return "Statement{"
@@ -61,4 +87,13 @@ public String toString() {
6187
public Positioned getPosition(final VtlParser.StartContext startContext) {
6288
return VtlScriptEngine.fromContext(startContext.getChild(unsortedIndex));
6389
}
90+
91+
public record Identifier(Type identifierType, String name) {
92+
public enum Type {
93+
VARIABLE,
94+
OPERATOR,
95+
RULESET_HIERARCHICAL,
96+
RULESET_DATAPOINT
97+
}
98+
}
6499
}

0 commit comments

Comments
 (0)