Skip to content

Commit b47035b

Browse files
committed
Keep / Drop changes
1 parent eae1be6 commit b47035b

File tree

24 files changed

+691
-450
lines changed

24 files changed

+691
-450
lines changed

vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import fr.insee.vtl.parser.VtlBaseVisitor;
1414
import fr.insee.vtl.parser.VtlParser;
1515
import java.util.*;
16+
import java.util.function.Function;
1617
import java.util.stream.Collectors;
1718
import org.antlr.v4.runtime.ParserRuleContext;
1819
import org.antlr.v4.runtime.misc.Interval;
@@ -106,16 +107,73 @@ private static AggregationExpression convertToAggregation(
106107

107108
@Override
108109
public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext ctx) {
109-
// Normalize to keep operation.
110-
var keep = ctx.op.getType() == VtlParser.KEEP;
111-
var names = ctx.componentID().stream().map(ClauseVisitor::getName).collect(Collectors.toSet());
112-
List<String> columnNames =
113-
datasetExpression.getDataStructure().values().stream()
114-
.map(Dataset.Component::getName)
115-
.filter(name -> keep == names.contains(name))
116-
.collect(Collectors.toList());
117110

118-
return processingEngine.executeProject(datasetExpression, columnNames);
111+
// The type of the op can either be KEEP or DROP.
112+
boolean keep = ctx.op.getType() == VtlParser.KEEP;
113+
114+
// Columns explicitly requested in the KEEP/DROP clause
115+
List<String> cleanColumnNames = ctx.componentID().stream().map(ClauseVisitor::getName).toList();
116+
117+
Collection<String> inputColumns = datasetExpression.getDataStructure().keySet();
118+
119+
// Dataset identifiers (role = IDENTIFIER)
120+
Map<String, Dataset.Component> identifiers =
121+
datasetExpression.getDataStructure().getIdentifiers().stream()
122+
.collect(Collectors.toMap(Structured.Component::getName, Function.identity()));
123+
124+
// Evaluate that all requested columns must exist in the dataset or raise an error
125+
for (String requested : cleanColumnNames) {
126+
if (!inputColumns.contains(requested)) {
127+
throw new VtlRuntimeException(
128+
new InvalidArgumentException(
129+
// TODO: use actual column context.
130+
String.format("'%s' not found in dataset.", requested), fromContext(ctx)));
131+
}
132+
}
133+
134+
// VTL specification: identifiers must not appear explicitly in KEEP
135+
Set<String> forbidden =
136+
cleanColumnNames.stream()
137+
.filter(identifiers::containsKey)
138+
.collect(Collectors.toCollection(LinkedHashSet::new));
139+
140+
if (!forbidden.isEmpty()) {
141+
StringBuilder details = new StringBuilder();
142+
for (String id : forbidden) {
143+
Dataset.Component comp = identifiers.get(id);
144+
details.append(
145+
String.format(
146+
"%s(role=%s, type=%s) ",
147+
id, comp.getRole(), comp.getType() != null ? comp.getType() : "n/a"));
148+
}
149+
throw new VtlRuntimeException(
150+
new InvalidArgumentException(
151+
String.format(
152+
"identifiers %s must not be explicitly listed in KEEP/DROP. Details: %s",
153+
forbidden, details.toString().trim()),
154+
// TODO: use actual column context.
155+
fromContext(ctx)));
156+
}
157+
158+
// Build result set:
159+
// + KEEP: identifiers + requested columns
160+
// + DROP: (all columns - requested) + identifiers
161+
final Set<String> resultSet = new LinkedHashSet<>();
162+
resultSet.addAll(identifiers.keySet());
163+
if (keep) {
164+
resultSet.addAll(cleanColumnNames);
165+
} else {
166+
for (String col : inputColumns) {
167+
if (!cleanColumnNames.contains(col)) {
168+
resultSet.add(col);
169+
}
170+
}
171+
}
172+
173+
// Retrieve the output column names (identifiers + requested)
174+
final List<String> outputColumns =
175+
inputColumns.stream().filter(resultSet::contains).collect(Collectors.toList());
176+
return processingEngine.executeProject(datasetExpression, outputColumns);
119177
}
120178

121179
@Override

vtl-engine/src/test/java/fr/insee/vtl/engine/utils/dag/DagTest.java

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,11 @@ void testDagSimpleExampleWithReordering() throws ScriptException {
199199
void testDagCycle() {
200200
final String script =
201201
"""
202-
e := a;
203-
b := a;
204-
c := b;
205-
a := c;
206-
f := a;""";
202+
e := a;
203+
b := a;
204+
c := b;
205+
a := c;
206+
f := a;""";
207207

208208
final Positioned.Position mainPosition = getPositionOfStatementInScript("a := c", script);
209209
final List<Positioned.Position> otherPositions =
@@ -231,14 +231,14 @@ void testDagCycle() {
231231
void testMultipleCycles() {
232232
final String script =
233233
"""
234-
h := g;
235-
i := join(h, input_ds);
236-
g := i;
237-
e := a;
238-
b := a;
239-
c := b;
240-
a := c;
241-
f := a;""";
234+
h := g;
235+
i := join(h, input_ds);
236+
g := i;
237+
e := a;
238+
b := a;
239+
c := b;
240+
a := c;
241+
f := a;""";
242242

243243
final Positioned.Position mainExceptionMainPosition =
244244
getPositionOfStatementInScript("g := i", script);
@@ -446,8 +446,8 @@ void testDagIfExpr() throws ScriptException {
446446
engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
447447
engine.eval(
448448
"res := if ds1 > ds2 then ds1 else ds2; "
449-
+ "ds1 := ds_1[keep id, long1][rename long1 to bool_var]; "
450-
+ "ds2 := ds_2[keep id, long1][rename long1 to bool_var];");
449+
+ "ds1 := ds_1[keep long1][rename long1 to bool_var]; "
450+
+ "ds2 := ds_2[keep long1][rename long1 to bool_var];");
451451
var res = engine.getContext().getAttribute("res");
452452
assertThat(((Dataset) res).getDataAsMap())
453453
.containsExactlyInAnyOrder(
@@ -463,7 +463,7 @@ void testDagCaseExpr() throws ScriptException {
463463
engine.getContext().setAttribute("ds_2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
464464
engine.eval(
465465
"res0 <- tmp0[calc c := case when long1 > 30 then \"ok\" else \"ko\"][drop long1]; "
466-
+ "tmp0 := ds_1[keep id, long1];");
466+
+ "tmp0 := ds_1[keep long1];");
467467
Object res0 = engine.getContext().getAttribute("res0");
468468
assertThat(((Dataset) res0).getDataAsMap())
469469
.containsExactlyInAnyOrder(
@@ -474,7 +474,7 @@ void testDagCaseExpr() throws ScriptException {
474474
assertThat(((Dataset) res0).getDataStructure().get("c").getType()).isEqualTo(String.class);
475475
engine.eval(
476476
"res1 <- tmp1[calc c := case when long1 > 30 then 1 else 0][drop long1]; "
477-
+ "tmp1 := ds_1[keep id, long1];");
477+
+ "tmp1 := ds_1[keep long1];");
478478
Object res1 = engine.getContext().getAttribute("res1");
479479
assertThat(((Dataset) res1).getDataAsMap())
480480
.containsExactlyInAnyOrder(
@@ -484,9 +484,9 @@ void testDagCaseExpr() throws ScriptException {
484484
Map.of("id", "Franck", "c", 1L));
485485
assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class);
486486
engine.eval(
487-
"tmp2_alt_ds1 := ds_1[keep id, long1][rename long1 to bool_var]; "
487+
"tmp2_alt_ds1 := ds_1[keep long1][rename long1 to bool_var]; "
488488
+ "res2 <- case when tmp2_alt_ds1 < 30 then tmp2_alt_ds1 else tmp2_alt_ds2; "
489-
+ "tmp2_alt_ds2 := ds_2[keep id, long1][rename long1 to bool_var];");
489+
+ "tmp2_alt_ds2 := ds_2[keep long1][rename long1 to bool_var];");
490490
Object resDs = engine.getContext().getAttribute("res2");
491491
assertThat(((Dataset) resDs).getDataAsMap())
492492
.containsExactlyInAnyOrder(
@@ -498,7 +498,7 @@ void testDagCaseExpr() throws ScriptException {
498498
@Test
499499
void testDagNvlExpr() throws ScriptException {
500500
engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
501-
engine.eval("res <- nvl(tmp1[keep id, long1], 0); tmp1 := ds1;");
501+
engine.eval("res <- nvl(tmp1[keep long1], 0); tmp1 := ds1;");
502502
var res = engine.getContext().getAttribute("res");
503503
assertThat(((Dataset) res).getDataAsMap())
504504
.containsExactlyInAnyOrder(
@@ -512,7 +512,7 @@ void testDagNvlExpr() throws ScriptException {
512512
@Test
513513
void testDagNvlImplicitCast() throws ScriptException {
514514
engine.getContext().setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
515-
engine.eval("res := nvl(tmp1[keep id, long1], 0.1); tmp1 <- ds1;");
515+
engine.eval("res := nvl(tmp1[keep long1], 0.1); tmp1 <- ds1;");
516516
var res = engine.getContext().getAttribute("res");
517517
assertThat(((Dataset) res).getDataAsMap())
518518
.containsExactlyInAnyOrder(
@@ -528,7 +528,7 @@ void testDagUnaryExpr() throws ScriptException {
528528
ScriptContext context = engine.getContext();
529529

530530
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
531-
Object res = engine.eval("res := + tmp1[keep id, long1, double1]; tmp1 <- ds2;");
531+
Object res = engine.eval("res := + tmp1[keep long1, double1]; tmp1 <- ds2;");
532532
assertThat(((Dataset) res).getDataAsMap())
533533
.containsExactlyInAnyOrder(
534534
Map.of("id", "Hadrien", "long1", 150L, "double1", 1.1D),
@@ -537,7 +537,7 @@ void testDagUnaryExpr() throws ScriptException {
537537
assertThat(((Dataset) res).getDataStructure().get("long1").getType()).isEqualTo(Long.class);
538538

539539
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
540-
Object res2 = engine.eval("res2 := - tmp2[keep id, long1, double1]; tmp2 := ds2;");
540+
Object res2 = engine.eval("res2 := - tmp2[keep long1, double1]; tmp2 := ds2;");
541541
assertThat(((Dataset) res2).getDataAsMap())
542542
.containsExactlyInAnyOrder(
543543
Map.of("id", "Hadrien", "long1", -150L, "double1", -1.1D),

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public void testManyCalc() throws ScriptException {
8787
}
8888

8989
@Test
90-
public void testCalcRoleModifier() throws ScriptException {
90+
public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptException {
9191
InMemoryDataset dataset =
9292
new InMemoryDataset(
9393
List.of(
@@ -194,7 +194,8 @@ public void testKeepDropClause() throws ScriptException {
194194
ScriptContext context = engine.getContext();
195195
context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);
196196

197-
engine.eval("ds2 := ds1[keep name, age];");
197+
// KEEP: identifiers must not be listed explicitly; they are implicitly preserved.
198+
engine.eval("ds2 := ds1[keep age];");
198199

199200
assertThat(engine.getContext().getAttribute("ds2")).isInstanceOf(Dataset.class);
200201
assertThat(((Dataset) engine.getContext().getAttribute("ds2")).getDataAsMap())
@@ -213,6 +214,26 @@ public void testKeepDropClause() throws ScriptException {
213214
Map.of("name", "Franck", "age", 12L));
214215
}
215216

217+
/** KEEP/DROP: listing identifiers explicitly must raise a script error. */
218+
@Test
219+
public void testKeepDropClause_identifierExplicitShouldFail() {
220+
InMemoryDataset dataset =
221+
new InMemoryDataset(
222+
List.of(
223+
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
224+
Map.of("name", "Nico", "age", 11L, "weight", 10L),
225+
Map.of("name", "Franck", "age", 12L, "weight", 9L)),
226+
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
227+
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));
228+
229+
ScriptContext context = engine.getContext();
230+
context.setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);
231+
232+
assertThatThrownBy(() -> engine.eval("ds := ds1[keep name, age];"))
233+
.isInstanceOf(VtlScriptException.class)
234+
.hasMessageContaining("identifiers [name] must not be explicitly listed in KEEP/DROP");
235+
}
236+
216237
@Test
217238
public void testAggregateType() {
218239
InMemoryDataset dataset =

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprOrConcatTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void testPlus() throws ScriptException {
5252
assertThat(context.getAttribute("plus2")).isEqualTo(5.0);
5353

5454
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
55-
Object res = engine.eval("res := ds1[keep id, long1, long2] + ds1[keep id, long1, long2];");
55+
Object res = engine.eval("res := ds1[keep long1, long2] + ds1[keep long1, long2];");
5656
assertThat(((Dataset) res).getDataAsMap())
5757
.containsExactlyInAnyOrder(
5858
Map.of("id", "Toto", "long1", 60L, "long2", 600L),
@@ -75,7 +75,7 @@ public void testMinus() throws ScriptException {
7575

7676
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
7777
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
78-
Object res = engine.eval("res := ds2[keep id, long1] - ds1[keep id, long1] + 1;");
78+
Object res = engine.eval("res := ds2[keep long1] - ds1[keep long1] + 1;");
7979
assertThat(((Dataset) res).getDataAsMap())
8080
.containsExactlyInAnyOrder(
8181
Map.of("id", "Hadrien", "long1", 141L),
@@ -92,7 +92,7 @@ public void testConcat() throws ScriptException {
9292

9393
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
9494
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
95-
Object res = engine.eval("res := ds2[keep id, string1] || \" \" || ds1[keep id, string1];");
95+
Object res = engine.eval("res := ds2[keep string1] || \" \" || ds1[keep string1];");
9696
assertThat(((Dataset) res).getDataAsMap())
9797
.containsExactlyInAnyOrder(
9898
Map.of("id", "Hadrien", "string1", "hadrien hadrien"),

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ArithmeticExprTest.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ public void testArithmeticExpr() throws ScriptException {
6464

6565
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
6666
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
67-
Object res =
68-
engine.eval("res := round(ds1[keep id, long1, double1] * ds2[keep id, long1, double1]);");
67+
Object res = engine.eval("res := round(ds1[keep long1, double1] * ds2[keep long1, double1]);");
6968
assertThat(((Dataset) res).getDataAsMap())
7069
.containsExactlyInAnyOrder(
7170
Map.of("id", "Hadrien", "long1", 1500.0, "double1", 1.0),
@@ -83,8 +82,7 @@ public void testArithmeticExpr() throws ScriptException {
8382
engine.eval("div4 := 3.0 / 1.5;");
8483
assertThat(context.getAttribute("div4")).isEqualTo(2.0);
8584

86-
res =
87-
engine.eval("res2 := round(ds1[keep id, long1, double1] / ds2[keep id, long1, double1]);");
85+
res = engine.eval("res2 := round(ds1[keep long1, double1] / ds2[keep long1, double1]);");
8886
assertThat(((Dataset) res).getDataAsMap())
8987
.containsExactlyInAnyOrder(
9088
Map.of("id", "Hadrien", "long1", 0.0, "double1", 1.0),

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/BooleanExprTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ public void testOnDatasets() throws ScriptException {
100100
context.setAttribute("ds_1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
101101
context.setAttribute("ds_2", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
102102
engine.eval(
103-
"ds1 := ds_1[keep id, bool2][rename bool2 to bool1]; "
104-
+ "ds2 := ds_2[keep id, bool1]; "
103+
"ds1 := ds_1[keep bool2][rename bool2 to bool1]; "
104+
+ "ds2 := ds_2[keep bool1]; "
105105
+ "andDs := ds1 and ds2; "
106106
+ "orDs := ds1 or ds2; "
107107
+ "xorDs := ds1 xor ds2; ");

vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ComparisonExprTest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public void testComparisonExpr() throws ScriptException {
7373

7474
context.setAttribute("ds1", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
7575
context.setAttribute("ds2", DatasetSamples.ds2, ScriptContext.ENGINE_SCOPE);
76-
engine.eval("equal := ds1[keep id, long1] = ds2[keep id, long1];");
76+
engine.eval("equal := ds1[keep long1] = ds2[keep long1];");
7777
var equal = engine.getContext().getAttribute("equal");
7878
assertThat(((Dataset) equal).getDataAsMap())
7979
.containsExactlyInAnyOrder(
@@ -90,7 +90,7 @@ public void testComparisonExpr() throws ScriptException {
9090
assertThat((Boolean) context.getAttribute("long1")).isTrue();
9191
engine.eval("mix1 := 6 <> (3*20.0);");
9292
assertThat((Boolean) context.getAttribute("mix1")).isTrue();
93-
engine.eval("notEqual := ds1[keep id, long1] <> ds2[keep id, long1];");
93+
engine.eval("notEqual := ds1[keep long1] <> ds2[keep long1];");
9494
var notEqual = engine.getContext().getAttribute("notEqual");
9595
assertThat(((Dataset) notEqual).getDataAsMap())
9696
.containsExactlyInAnyOrder(
@@ -106,7 +106,7 @@ public void testComparisonExpr() throws ScriptException {
106106
assertThat((Boolean) context.getAttribute("lt1")).isFalse();
107107
engine.eval("mix2 := 6 < 6.1;");
108108
assertThat((Boolean) context.getAttribute("mix2")).isTrue();
109-
engine.eval("lt2 := ds1[keep id, long1] < ds2[keep id, long1];");
109+
engine.eval("lt2 := ds1[keep long1] < ds2[keep long1];");
110110
var lt = engine.getContext().getAttribute("lt2");
111111
assertThat(((Dataset) lt).getDataAsMap())
112112
.containsExactlyInAnyOrder(
@@ -122,7 +122,7 @@ public void testComparisonExpr() throws ScriptException {
122122
assertThat((Boolean) context.getAttribute("mt1")).isTrue();
123123
engine.eval("mix4 := 6 > 6.1;");
124124
assertThat((Boolean) context.getAttribute("mix4")).isFalse();
125-
engine.eval("mt2 := ds1[keep id, long1] > ds2[keep id, long1];");
125+
engine.eval("mt2 := ds1[keep long1] > ds2[keep long1];");
126126
var mt = engine.getContext().getAttribute("mt2");
127127
assertThat(((Dataset) mt).getDataAsMap())
128128
.containsExactlyInAnyOrder(
@@ -139,7 +139,7 @@ public void testComparisonExpr() throws ScriptException {
139139
engine.eval("mix5 := 6 <= 6.1;");
140140
assertThat((Boolean) context.getAttribute("mix5")).isTrue();
141141

142-
engine.eval("le2 := ds1[keep id, long1] <= ds2[keep id, long1];");
142+
engine.eval("le2 := ds1[keep long1] <= ds2[keep long1];");
143143
var le = engine.getContext().getAttribute("le2");
144144
assertThat(((Dataset) le).getDataAsMap())
145145
.containsExactlyInAnyOrder(
@@ -156,7 +156,7 @@ public void testComparisonExpr() throws ScriptException {
156156
engine.eval("mix6 := 6 >= 6.1;");
157157
assertThat((Boolean) context.getAttribute("mix6")).isFalse();
158158

159-
engine.eval("me2 := ds1[keep id, long1] >= ds2[keep id, long1];");
159+
engine.eval("me2 := ds1[keep long1] >= ds2[keep long1];");
160160
var me = engine.getContext().getAttribute("me2");
161161
assertThat(((Dataset) me).getDataAsMap())
162162
.containsExactlyInAnyOrder(
@@ -198,7 +198,7 @@ public void testInNotIn() throws ScriptException {
198198
assertThat((Boolean) engine.getContext().getAttribute("res4")).isTrue();
199199

200200
engine.getContext().setAttribute("ds", DatasetSamples.ds1, ScriptContext.ENGINE_SCOPE);
201-
engine.eval("me := ds[keep id, long1, string1] in {\"toto\", \"franck\"};");
201+
engine.eval("me := ds[keep long1, string1] in {\"toto\", \"franck\"};");
202202
var in = engine.getContext().getAttribute("me");
203203
assertThat(((Dataset) in).getDataAsMap())
204204
.containsExactlyInAnyOrder(

0 commit comments

Comments
 (0)