Skip to content

Commit 376f61a

Browse files
authored
Merge pull request #445 from InseeFr/feat/dag-implementation
Feat/dag implementation
2 parents d2b0a9b + 1c54dec commit 376f61a

File tree

20 files changed

+1756
-80
lines changed

20 files changed

+1756
-80
lines changed

docs/docs/developer-guide/dag.mdx

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
id: dag
3+
title: DAG (Directed acyclic graph) - Statement reordering
4+
sidebar_label: DAG - Statement reordering
5+
slug: /developer-guide/dag
6+
custom_edit_url: null
7+
---
8+
9+
### Statement reordering
10+
11+
Suppose we have two Transformations:
12+
13+
1. Create an intermediate dataset `DS_np`
14+
2. Use `DS_np` to calculate another dataset `DS_p`
15+
16+
Even if we write them in "reverse" order, the VTL standard requires executing them in the correct dependency order.
17+
18+
```
19+
-- Transformation Scheme Example
20+
21+
-- (i) This depends on DS_np
22+
DS_p <- if DS_np >= 0 then DS_np else DS_1;
23+
24+
-- (ii) This produces DS_np
25+
DS_np := (DS_1 - DS_2) * 2;
26+
```
27+
28+
Execution Order (resolved by the engine)
29+
30+
1. (ii) must run first because DS_np is required before evaluating (i).
31+
32+
2. (i) runs afterwards, since it consumes DS_np.
33+
34+
So even though we wrote (i) before (ii), the engine reorders them automatically, **when reordering is activated**.
35+
36+
### Activate reordering
37+
38+
The current behavior of Trevas is that statement reordering is **activated per default**, as this is the behavior required by the VTL standard.
39+
Statement reordering can be **deactivated** via the following config flag ("$vtl.engine.use_dag")
40+
41+
```java
42+
ScriptEngine engine = new ScriptEngineManager().getEngineByName("vtl");
43+
engine.put("$vtl.engine.use_dag", "false");
44+
```

docs/sidebars.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ module.exports = {
101101
items: [
102102
'developer-guide/index-developer-guide',
103103
'developer-guide/developer-javadoc',
104+
'developer-guide/dag',
104105
{
105106
type: 'category',
106107
label: 'Basic mode',

vtl-engine/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@
4949
<artifactId>safety-mirror</artifactId>
5050
<version>4.0.1</version>
5151
</dependency>
52+
<dependency>
53+
<groupId>org.jgrapht</groupId>
54+
<artifactId>jgrapht-core</artifactId>
55+
<version>1.5.2</version>
56+
</dependency>
5257
</dependencies>
5358
<build>
5459
<plugins>

vtl-engine/src/main/java/fr/insee/vtl/engine/VtlScriptEngine.java

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@
55
import fr.insee.vtl.engine.exceptions.VtlRuntimeException;
66
import fr.insee.vtl.engine.exceptions.VtlSyntaxException;
77
import fr.insee.vtl.engine.visitors.AssignmentVisitor;
8-
import fr.insee.vtl.model.FunctionProvider;
9-
import fr.insee.vtl.model.Positioned;
10-
import fr.insee.vtl.model.ProcessingEngine;
11-
import fr.insee.vtl.model.ProcessingEngineFactory;
12-
import fr.insee.vtl.model.VtlMethod;
8+
import fr.insee.vtl.model.*;
139
import fr.insee.vtl.model.exceptions.VtlScriptException;
1410
import fr.insee.vtl.parser.VtlLexer;
1511
import fr.insee.vtl.parser.VtlParser;
@@ -19,37 +15,11 @@
1915
import java.lang.reflect.ParameterizedType;
2016
import java.lang.reflect.Type;
2117
import java.lang.reflect.TypeVariable;
22-
import java.util.ArrayDeque;
23-
import java.util.Arrays;
24-
import java.util.Collection;
25-
import java.util.Deque;
26-
import java.util.HashMap;
27-
import java.util.HashSet;
28-
import java.util.LinkedHashMap;
29-
import java.util.List;
30-
import java.util.Map;
31-
import java.util.Optional;
32-
import java.util.ServiceLoader;
33-
import java.util.Set;
34-
import java.util.StringJoiner;
18+
import java.util.*;
3519
import java.util.stream.Collectors;
3620
import java.util.stream.Stream;
37-
import javax.script.AbstractScriptEngine;
38-
import javax.script.Bindings;
39-
import javax.script.ScriptContext;
40-
import javax.script.ScriptEngine;
41-
import javax.script.ScriptEngineFactory;
42-
import javax.script.ScriptEngineManager;
43-
import javax.script.ScriptException;
44-
import javax.script.SimpleBindings;
45-
import org.antlr.v4.runtime.BaseErrorListener;
46-
import org.antlr.v4.runtime.CharStreams;
47-
import org.antlr.v4.runtime.CodePointCharStream;
48-
import org.antlr.v4.runtime.CommonTokenStream;
49-
import org.antlr.v4.runtime.ParserRuleContext;
50-
import org.antlr.v4.runtime.RecognitionException;
51-
import org.antlr.v4.runtime.Recognizer;
52-
import org.antlr.v4.runtime.Token;
21+
import javax.script.*;
22+
import org.antlr.v4.runtime.*;
5323
import org.antlr.v4.runtime.tree.ParseTree;
5424
import org.antlr.v4.runtime.tree.TerminalNode;
5525

@@ -71,6 +41,9 @@ public class VtlScriptEngine extends AbstractScriptEngine {
7141
/** Script engine property giving the (comma-separated) list of engine names. */
7242
public static final String PROCESSING_ENGINE_NAMES = "$vtl.engine.processing_engine_names";
7343

44+
/** Script engine property to switch on DAG generation. */
45+
public static final String USE_DAG = "$vtl.engine.use_dag";
46+
7447
private final ScriptEngineFactory factory;
7548
private Map<String, Method> methodCache;
7649

@@ -189,6 +162,16 @@ private String getProcessingEngineName() {
189162
}
190163
}
191164

165+
/**
166+
* Returns whether to create and use the DAG or not.
167+
*
168+
* @return true if the DAG is to be used.
169+
*/
170+
public boolean isUseDag() {
171+
Object useDag = get(USE_DAG);
172+
return useDag == null || "true".equalsIgnoreCase(useDag.toString());
173+
}
174+
192175
/**
193176
* Returns an instance of the processing engine for the script engine.
194177
*
@@ -261,6 +244,17 @@ public void syntaxError(
261244
throw first;
262245
}
263246

247+
VtlSyntaxPreprocessor syntaxPreprocessor =
248+
new VtlSyntaxPreprocessor(
249+
start, context.getBindings(ScriptContext.ENGINE_SCOPE).keySet());
250+
251+
if (isUseDag()) {
252+
// Reorder Script code
253+
start = syntaxPreprocessor.checkForMultipleAssignmentsAndReorderScript();
254+
} else {
255+
syntaxPreprocessor.checkForMultipleAssignments();
256+
}
257+
264258
AssignmentVisitor assignmentVisitor = new AssignmentVisitor(this, getProcessingEngine());
265259
Object lastValue = null;
266260
for (VtlParser.StatementContext stmt : start.statement()) {
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package fr.insee.vtl.engine;
2+
3+
import fr.insee.vtl.engine.utils.dag.DAGBuilder;
4+
import fr.insee.vtl.engine.utils.dag.DAGStatement;
5+
import fr.insee.vtl.engine.visitors.DAGBuildingVisitor;
6+
import fr.insee.vtl.model.exceptions.VtlMultiErrorScriptException;
7+
import fr.insee.vtl.model.exceptions.VtlScriptException;
8+
import fr.insee.vtl.parser.VtlParser;
9+
import java.util.*;
10+
import java.util.stream.Collectors;
11+
import java.util.stream.Stream;
12+
import org.antlr.v4.runtime.ParserRuleContext;
13+
14+
/**
15+
* Class for preprocessing the VTL script for resolving script errors and reordering statements
16+
* based on the variables dependency order
17+
*/
18+
public class VtlSyntaxPreprocessor {
19+
20+
private final VtlParser.StartContext startContext;
21+
private final Set<String> bindingVarIds;
22+
private final List<DAGStatement> unsortedStatements;
23+
24+
public VtlSyntaxPreprocessor(VtlParser.StartContext startContext, Set<String> bindingVarIds) {
25+
this.startContext = startContext;
26+
this.bindingVarIds = bindingVarIds;
27+
DAGBuildingVisitor visitor = new DAGBuildingVisitor();
28+
this.unsortedStatements = visitor.visit(startContext);
29+
}
30+
31+
/**
32+
* Method to check for multiple assignments of variables and reorder the VTL script according to
33+
* the variables dependency order as defined by the VTL standard.
34+
*
35+
* @return reordered VTL
36+
* @throws VtlScriptException when variables are assigned multiple times
37+
*/
38+
public VtlParser.StartContext checkForMultipleAssignmentsAndReorderScript()
39+
throws VtlScriptException {
40+
checkForMultipleAssignments();
41+
42+
// Create DAG & topological sort
43+
DAGBuilder dagBuilder = new DAGBuilder(unsortedStatements, startContext);
44+
List<DAGStatement> sortedStatements = dagBuilder.topologicalSortedStatements();
45+
46+
VtlParser.StartContext startReordered =
47+
new VtlParser.StartContext(
48+
(ParserRuleContext) startContext.getRuleContext(), startContext.invokingState);
49+
50+
// Build a set of unsorted indices that need reordering
51+
Set<Integer> unsortedIndices =
52+
sortedStatements.stream().map(DAGStatement::unsortedIndex).collect(Collectors.toSet());
53+
54+
int sortedIndex = 0;
55+
for (int i = 0; i < startContext.getChildCount(); i++) {
56+
if (unsortedIndices.contains(i)) {
57+
DAGStatement stmt = sortedStatements.get(sortedIndex++);
58+
startReordered.addAnyChild(startContext.getChild(stmt.unsortedIndex()));
59+
} else {
60+
startReordered.addAnyChild(startContext.getChild(i));
61+
}
62+
}
63+
return startReordered;
64+
}
65+
66+
/**
67+
* Method to check for multiple assignments of variables.
68+
*
69+
* @throws VtlScriptException when variables are assigned multiple times.
70+
*/
71+
public void checkForMultipleAssignments() throws VtlScriptException {
72+
List<DAGStatement> bindingPseudoStatements =
73+
bindingVarIds.stream()
74+
.map(
75+
bindingVarId ->
76+
new DAGStatement(
77+
DAGStatement.PSEUDO_BINDING_POSITION,
78+
new DAGStatement.Identifier(
79+
DAGStatement.Identifier.Type.VARIABLE, bindingVarId),
80+
Set.of()))
81+
.toList();
82+
Map<DAGStatement.Identifier, List<DAGStatement>> groupedByProducedIdentifier =
83+
Stream.concat(bindingPseudoStatements.stream(), unsortedStatements.stream())
84+
.collect(Collectors.groupingBy(DAGStatement::produces));
85+
86+
List<VtlScriptException> multiProducedExceptions =
87+
groupedByProducedIdentifier.entrySet().stream()
88+
.filter(produced -> produced.getValue().size() > 1)
89+
.map(
90+
multiProduced ->
91+
buildScriptExceptionFromMultipleAssignment(
92+
multiProduced.getKey(), multiProduced.getValue()))
93+
.toList();
94+
95+
if (!multiProducedExceptions.isEmpty()) {
96+
throw VtlMultiErrorScriptException.of(
97+
multiProducedExceptions.toArray(new VtlScriptException[] {}));
98+
}
99+
}
100+
101+
private VtlScriptException buildScriptExceptionFromMultipleAssignment(
102+
DAGStatement.Identifier identifier, List<DAGStatement> statements) {
103+
final List<DAGStatement> statementsWithoutBinding =
104+
statements.stream()
105+
.filter(statement -> statement.unsortedIndex() != DAGStatement.PSEUDO_BINDING_POSITION)
106+
.toList();
107+
108+
if (statementsWithoutBinding.size() == 1) {
109+
return new VtlScriptException(
110+
"Dataset "
111+
+ identifier.name()
112+
+ " is part of the bindings and therefore cannot be assigned",
113+
statementsWithoutBinding.get(0).getPosition(startContext));
114+
}
115+
116+
return DAGStatement.buildMultiStatementExceptionUsingTheLastDAGStatementAsMainPosition(
117+
"Dataset "
118+
+ identifier.name()
119+
+ " has already been assigned"
120+
+ (statements.size() == statementsWithoutBinding.size()
121+
? ""
122+
: " and is part of the bindings and therefore cannot be assigned"),
123+
statementsWithoutBinding,
124+
startContext);
125+
}
126+
}

0 commit comments

Comments
 (0)