Skip to content

Commit 15aadc3

Browse files
authored
Merge pull request #459 from InseeFr/fix/430-keep-drop
Fix keep/drop
2 parents 6e58b81 + 689b27d commit 15aadc3

File tree

25 files changed

+708
-467
lines changed

25 files changed

+708
-467
lines changed

vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66

77
import fr.insee.vtl.engine.VtlScriptEngine;
88
import fr.insee.vtl.engine.exceptions.InvalidArgumentException;
9+
import fr.insee.vtl.engine.exceptions.UndefinedVariableException;
910
import fr.insee.vtl.engine.exceptions.VtlRuntimeException;
1011
import fr.insee.vtl.engine.visitors.expression.ExpressionVisitor;
1112
import fr.insee.vtl.model.*;
1213
import fr.insee.vtl.model.exceptions.VtlScriptException;
1314
import fr.insee.vtl.parser.VtlBaseVisitor;
1415
import fr.insee.vtl.parser.VtlParser;
1516
import java.util.*;
17+
import java.util.function.Function;
1618
import java.util.stream.Collectors;
1719
import org.antlr.v4.runtime.ParserRuleContext;
1820
import org.antlr.v4.runtime.misc.Interval;
@@ -106,16 +108,59 @@ private static AggregationExpression convertToAggregation(
106108

107109
@Override
108110
public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) {
109-
// Normalize to keep operation.
110-
var keep = ctx.op.getType() == VtlParser.KEEP;
111-
var names = ctx.componentID().stream().map(ClauseVisitor::getName).collect(Collectors.toSet());
112-
List<String> columnNames =
113-
datasetExpression.getDataStructure().values().stream()
114-
.map(Dataset.Component::getName)
115-
.filter(name -> keep == names.contains(name))
116-
.collect(Collectors.toList());
117111

118-
return processingEngine.executeProject(datasetExpression, columnNames);
112+
// The type of the op can either be KEEP or DROP.
113+
boolean keep = ctx.op.getType() == VtlParser.KEEP;
114+
115+
// Dataset identifiers (role = IDENTIFIER)
116+
Map<String, Dataset.Component> identifiers =
117+
datasetExpression.getDataStructure().getIdentifiers().stream()
118+
.collect(Collectors.toMap(Structured.Component::getName, Function.identity()));
119+
120+
var columns =
121+
ctx.componentID().stream()
122+
.collect(Collectors.toMap(ClauseVisitor::getName, Function.identity()));
123+
124+
var structure = datasetExpression.getDataStructure();
125+
126+
// Evaluate that all requested columns must exist in the dataset or raise an error
127+
// TODO: Is that no handled already?
128+
for (String col : columns.keySet()) {
129+
if (!structure.containsKey(col)) {
130+
throw new VtlRuntimeException(
131+
new UndefinedVariableException(col, fromContext(columns.get(col))));
132+
}
133+
}
134+
135+
// VTL specification: identifiers must not appear explicitly in KEEP
136+
// TODO: Use multi errors that noah created?
137+
for (String col : columns.keySet()) {
138+
if (structure.get(col).isIdentifier()) {
139+
throw new VtlRuntimeException(
140+
new InvalidArgumentException(
141+
"cannot keep/drop identifiers", fromContext(columns.get(col))));
142+
}
143+
}
144+
145+
// Build result set:
146+
// + KEEP: identifiers + requested columns
147+
// + DROP: (all columns - requested) + identifiers
148+
final Set<String> resultSet = new LinkedHashSet<>();
149+
resultSet.addAll(identifiers.keySet());
150+
if (keep) {
151+
resultSet.addAll(columns.keySet());
152+
} else {
153+
for (String col : structure.keySet()) {
154+
if (!columns.keySet().contains(col)) {
155+
resultSet.add(col);
156+
}
157+
}
158+
}
159+
160+
// Retrieve the output column names (identifiers + requested)
161+
final List<String> outputColumns =
162+
structure.keySet().stream().filter(resultSet::contains).collect(Collectors.toList());
163+
return processingEngine.executeProject(datasetExpression, outputColumns);
119164
}
120165

121166
@Override

vtl-engine/src/test/java/fr/insee/vtl/engine/VtlScriptEngineTest.java

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,37 @@ public static <T extends Throwable> Condition<T> atPosition(
3333

3434
public static <T extends Throwable> Condition<T> atPosition(
3535
Integer startLine, Integer endLine, Integer startColumn, Integer endColumn) {
36-
return new Condition<>(
37-
throwable -> {
38-
var scriptException = (VtlScriptException) throwable;
39-
var position = scriptException.getPosition();
40-
return position.startLine().equals(startLine)
41-
&& position.endLine().equals(endLine)
42-
&& position.startColumn().equals(startColumn)
43-
&& position.endColumn().equals(endColumn);
44-
},
45-
"at position <%d:%d-%d:%d>",
46-
startLine,
47-
endLine,
48-
startColumn,
49-
endColumn);
50-
}
51-
52-
public static <T extends Comparable<T>> Boolean test(T left, T right) {
53-
return true;
36+
return new Condition<T>() {
37+
@Override
38+
public boolean matches(T throwable) {
39+
if (!(throwable instanceof VtlScriptException scriptException)) {
40+
return false;
41+
}
42+
var position = scriptException.getPosition();
43+
boolean matches =
44+
position.startLine().equals(startLine)
45+
&& position.endLine().equals(endLine)
46+
&& position.startColumn().equals(startColumn)
47+
&& position.endColumn().equals(endColumn);
48+
49+
// Set description that includes actual position if it doesn't match
50+
if (matches) {
51+
describedAs("at position <%d:%d-%d:%d>", startLine, startColumn, endLine, endColumn);
52+
} else {
53+
describedAs(
54+
"at position <%d:%d-%d:%d> but was <%d:%d-%d:%d>",
55+
startLine,
56+
startColumn,
57+
endLine,
58+
endColumn,
59+
position.startLine(),
60+
position.startColumn(),
61+
position.endLine(),
62+
position.endColumn());
63+
}
64+
return matches;
65+
}
66+
};
5467
}
5568

5669
@BeforeEach

vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,11 @@ void testDagSimpleExampleWithReordering() throws ScriptException {
199199
void testDagCycle() {
200200
final String script =
201201
"""
202-
e := a;
203-
b := a;
204-
c := b;
205-
a := c;
206-
f := a;""";
202+
e := a;
203+
b := a;
204+
c := b;
205+
a := c;
206+
f := a;""";
207207

208208
final Positioned.Position mainPosition = getPositionOfStatementInScript("a := c", script);
209209
final List<Positioned.Position> otherPositions =
@@ -231,14 +231,14 @@ void testDagCycle() {
231231
void testMultipleCycles() {
232232
final String script =
233233
"""
234-
h := g;
235-
i := join(h, input_ds);
236-
g := i;
237-
e := a;
238-
b := a;
239-
c := b;
240-
a := c;
241-
f := a;""";
234+
h := g;
235+
i := join(h, input_ds);
236+
g := i;
237+
e := a;
238+
b := a;
239+
c := b;
240+
a := c;
241+
f := a;""";
242242

243243
final Positioned.Position mainExceptionMainPosition =
244244
getPositionOfStatementInScript("g := i", script);
@@ -446,8 +446,8 @@ void testDagIfExpr() throws ScriptException {
446446
engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
447447
engine.eval(
448448
"res := if ds1 > ds2 then ds1 else ds2; "
449-
+ "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; "
450-
+ "ds2 := ds_2[keep id, long1][rename long1 to bool_var];");
449+
+ "ds1 := ds_1[keep long1][rename long1 to bool_var]; "
450+
+ "ds2 := ds_2[keep long1][rename long1 to bool_var];");
451451
var res = engine.getContext().getAttribute("res");
452452
assertThat(((Dataset) res).getDataAsMap())
453453
.containsExactlyInAnyOrder(
@@ -463,7 +463,7 @@ void testDagCaseExpr() throws ScriptException {
463463
engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
464464
engine.eval(
465465
"res0 <- tmp0[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1]; "
466-
+ "tmp0 := ds_1[keep id, long1];");
466+
+ "tmp0 := ds_1[keep long1];");
467467
Object res0 = engine.getContext().getAttribute("res0");
468468
assertThat(((Dataset) res0).getDataAsMap())
469469
.containsExactlyInAnyOrder(
@@ -474,7 +474,7 @@ void testDagCaseExpr() throws ScriptException {
474474
assertThat(((Dataset) res0).getDataStructure().get("c").getType()).isEqualTo(String.class);
475475
engine.eval(
476476
"res1 <- tmp1[calc c := case when long1 > 30 then 1 else 0][drop long1]; "
477-
+ "tmp1 := ds_1[keep id, long1];");
477+
+ "tmp1 := ds_1[keep long1];");
478478
Object res1 = engine.getContext().getAttribute("res1");
479479
assertThat(((Dataset) res1).getDataAsMap())
480480
.containsExactlyInAnyOrder(
@@ -484,9 +484,9 @@ void testDagCaseExpr() throws ScriptException {
484484
Map.of("id", "Franck", "c", 1L));
485485
assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class);
486486
engine.eval(
487-
"tmp2_alt_ds1 := ds_1[keep id, long1][rename long1 to bool_var]; "
487+
"tmp2_alt_ds1 := ds_1[keep long1][rename long1 to bool_var]; "
488488
+ "res2 <- case when tmp2_alt_ds1 < 30 then tmp2_alt_ds1 else tmp2_alt_ds2; "
489-
+ "tmp2_alt_ds2 := ds_2[keep id, long1][rename long1 to bool_var];");
489+
+ "tmp2_alt_ds2 := ds_2[keep long1][rename long1 to bool_var];");
490490
Object resDs = engine.getContext().getAttribute("res2");
491491
assertThat(((Dataset) resDs).getDataAsMap())
492492
.containsExactlyInAnyOrder(
@@ -498,7 +498,7 @@ void testDagCaseExpr() throws ScriptException {
498498
@Test
499499
void testDagNvlExpr() throws ScriptException {
500500
engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
501-
engine.eval("res <- nvl(tmp1[keep id, long1], 0); tmp1 := ds1;");
501+
engine.eval("res <- nvl(tmp1[keep long1], 0); tmp1 := ds1;");
502502
var res = engine.getContext().getAttribute("res");
503503
assertThat(((Dataset) res).getDataAsMap())
504504
.containsExactlyInAnyOrder(
@@ -512,7 +512,7 @@ void testDagNvlExpr() throws ScriptException {
512512
@Test
513513
void testDagNvlImplicitCast() throws ScriptException {
514514
engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
515-
engine.eval("res := nvl(tmp1[keep id, long1], 0.1); tmp1 <- ds1;");
515+
engine.eval("res := nvl(tmp1[keep long1], 0.1); tmp1 <- ds1;");
516516
var res = engine.getContext().getAttribute("res");
517517
assertThat(((Dataset) res).getDataAsMap())
518518
.containsExactlyInAnyOrder(
@@ -528,7 +528,7 @@ void testDagUnaryExpr() throws ScriptException {
528528
ScriptContext context = engine.getContext();
529529

530530
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
531-
Object res = engine.eval("res := + tmp1[keep id, long1, double1]; tmp1 <- ds2;");
531+
Object res = engine.eval("res := + tmp1[keep long1, double1]; tmp1 <- ds2;");
532532
assertThat(((Dataset) res).getDataAsMap())
533533
.containsExactlyInAnyOrder(
534534
Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D),
@@ -537,7 +537,7 @@ void testDagUnaryExpr() throws ScriptException {
537537
assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Long.class);
538538

539539
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
540-
Object res2 = engine.eval("res2 := - tmp2[keep id, long1, double1]; tmp2 := ds2;");
540+
Object res2 = engine.eval("res2 := - tmp2[keep long1, double1]; tmp2 := ds2;");
541541
assertThat(((Dataset) res2).getDataAsMap())
542542
.containsExactlyInAnyOrder(
543543
Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D),

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public void testManyCalc() throws ScriptException {
8787
}
8888

8989
@Test
90-
public void testCalcRoleModifier() throws ScriptException {
90+
public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException {
9191
InMemoryDataset dataset =
9292
new InMemoryDataset(
9393
List.of(
@@ -194,7 +194,8 @@ public void testKeepDropClause() throws ScriptException {
194194
ScriptContext context = engine.getContext();
195195
context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);
196196

197-
engine.eval("ds2 := ds1[keep name, age];");
197+
// KEEP: identifiers must not be listed explicitly; they are implicitly preserved.
198+
engine.eval("ds2 := ds1[keep age];");
198199

199200
assertThat(engine.getContext().getAttribute("ds2")).isInstanceOf(Dataset.class);
200201
assertThat(((Dataset) engine.getContext().getAttribute("ds2")).getDataAsMap())
@@ -213,6 +214,27 @@ public void testKeepDropClause() throws ScriptException {
213214
Map.of("name", "Franck", "age", 12L));
214215
}
215216

217+
/** KEEP/DROP: listing identifiers explicitly must raise a script error. */
218+
@Test
219+
public void testKeepDropClause_identifierExplicitShouldFail() {
220+
InMemoryDataset dataset =
221+
new InMemoryDataset(
222+
List.of(
223+
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
224+
Map.of("name", "Nico", "age", 11L, "weight", 10L),
225+
Map.of("name", "Franck", "age", 12L, "weight", 9L)),
226+
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
227+
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));
228+
229+
ScriptContext context = engine.getContext();
230+
context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);
231+
232+
assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];"))
233+
.isInstanceOf(VtlScriptException.class)
234+
.hasMessage("cannot keep/drop identifiers")
235+
.is(atPosition(0, 15, 19));
236+
}
237+
216238
@Test
217239
public void testAggregateType() {
218240
InMemoryDataset dataset =

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void testPlus() throws ScriptException {
5252
assertThat(context.getAttribute("plus2")).isEqualTo(5.0);
5353

5454
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
55-
Object res = engine.eval("res := ds1[keep id, long1, long2] + ds1[keep id, long1, long2];");
55+
Object res = engine.eval("res := ds1[keep long1, long2] + ds1[keep long1, long2];");
5656
assertThat(((Dataset) res).getDataAsMap())
5757
.containsExactlyInAnyOrder(
5858
Map.of("id", "Toto", "long1", 60L, "long2", 600L),
@@ -75,7 +75,7 @@ public void testMinus() throws ScriptException {
7575

7676
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
7777
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
78-
Object res = engine.eval("res := ds2[keep id, long1] - ds1[keep id, long1] + 1;");
78+
Object res = engine.eval("res := ds2[keep long1] - ds1[keep long1] + 1;");
7979
assertThat(((Dataset) res).getDataAsMap())
8080
.containsExactlyInAnyOrder(
8181
Map.of("id", "Hadrien", "long1", 141L),
@@ -92,7 +92,7 @@ public void testConcat() throws ScriptException {
9292

9393
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
9494
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
95-
Object res = engine.eval("res := ds2[keep id, string1] || \" \" || ds1[keep id, string1];");
95+
Object res = engine.eval("res := ds2[keep string1] || \" \" || ds1[keep string1];");
9696
assertThat(((Dataset) res).getDataAsMap())
9797
.containsExactlyInAnyOrder(
9898
Map.of("id", "Hadrien", "string1", "hadrien hadrien"),

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ public void testArithmeticExpr() throws ScriptException {
6464

6565
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
6666
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
67-
Object res =
68-
engine.eval("res := round(ds1[keep id, long1, double1] * ds2[keep id, long1, double1]);");
67+
Object res = engine.eval("res := round(ds1[keep long1, double1] * ds2[keep long1, double1]);");
6968
assertThat(((Dataset) res).getDataAsMap())
7069
.containsExactlyInAnyOrder(
7170
Map.of("id", "Hadrien", "long1", 1500.0, "double1", 1.0),
@@ -83,8 +82,7 @@ public void testArithmeticExpr() throws ScriptException {
8382
engine.eval("div4 := 3.0 / 1.5;");
8483
assertThat(context.getAttribute("div4")).isEqualTo(2.0);
8584

86-
res =
87-
engine.eval("res2 := round(ds1[keep id, long1, double1] / ds2[keep id, long1, double1]);");
85+
res = engine.eval("res2 := round(ds1[keep long1, double1] / ds2[keep long1, double1]);");
8886
assertThat(((Dataset) res).getDataAsMap())
8987
.containsExactlyInAnyOrder(
9088
Map.of("id", "Hadrien", "long1", 0.0, "double1", 1.0),

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ public void testOnDatasets() throws ScriptException {
100100
context.setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
101101
context.setAttribute("ds_2", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
102102
engine.eval(
103-
"ds1 := ds_1[keep id, bool2][rename bool2 to bool1]; "
104-
+ "ds2 := ds_2[keep id, bool1]; "
103+
"ds1 := ds_1[keep bool2][rename bool2 to bool1]; "
104+
+ "ds2 := ds_2[keep bool1]; "
105105
+ "andDs := ds1 and ds2; "
106106
+ "orDs := ds1 or ds2; "
107107
+ "xorDs := ds1 xor ds2; ");

0 commit comments

Comments
 (0)