diff --git a/.github/workflows/tck-vtl-tf.yml b/.github/workflows/tck-vtl-tf.yml new file mode 100644 index 000000000..4634855b5 --- /dev/null +++ b/.github/workflows/tck-vtl-tf.yml @@ -0,0 +1,70 @@ +name: Run VTL TF TCK + +on: + push: + branches: [ '**' ] + pull_request: + branches: [ master, develop ] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout main project + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Clone vtl spec repo (branch fix/tck-2.1) + run: git clone --branch fix/tck-2.1 https://github.com/sdmx-twg/vtl.git + + - name: Install Python 3 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Run TCK generator script + run: | + DOC_VERSION=v2.1 python3 vtl/scripts/generate_tck_files.py + + - name: Move generated TCK zip to resources + run: | + mkdir -p coverage/src/main/resources + mv vtl/tck/v2.1.zip coverage/src/main/resources/ + + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '17' + + - uses: s4u/maven-settings-action@v3.0.0 + with: + githubServer: false + servers: | + [{ + "id": "Github", + "username": "${{ secrets.GH_PACKAGES_USERNAME }}", + "password": "${{ secrets.GH_PACKAGES_PASSWORD }}" + }] + + - name: Cache Maven packages + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Build and run tests + run: mvn clean test --batch-mode + + - name: Publish JUnit test results + uses: dorny/test-reporter@v2 + if: always() + with: + name: JUnit Test Report + path: coverage/target/surefire-reports/*.xml + reporter: java-junit + fail-on-error: 'false' diff --git a/coverage/README.md b/coverage/README.md new file mode 100644 index 000000000..f83dca2eb --- /dev/null +++ b/coverage/README.md @@ -0,0 +1,32 @@ +# ๐Ÿ“Š Coverage + +## ๐Ÿงช TCK (Technology Compatibility Kit) + +We are working on a suite of compatibility tests to ensure conformance with the VTL specification across implementations. + +๐Ÿ› ๏ธ _Work in Progress_ + +### Issues/discussions to follow + +- [junit5](https://github.com/junit-team/junit5/discussions/4504#discussioncomment-13046641) +- [surefire](https://github.com/apache/maven-surefire/issues/835) + +### Temporary run procedure + +While [TCK](https://github.com/sdmx-twg/vtl/pull/565) is not automated in the VTL TF repository, we have to build the input source manually. + +```shell +git clone https://github.com/sdmx-twg/vtl.git +cd vtl/scripts +DOC_VERSION=v2.1 python3 generate_tck_files.py +``` + +A zip will be created at `tck/v2.1.zip`. + +Move it in Trevas resources: + +```shell +mv vtl/tck/v2.1.zip trevas/coverage/src/main/resources +``` + +You are now able to run `TCKTest`. \ No newline at end of file diff --git a/coverage/pom.xml b/coverage/pom.xml index e4cafe48e..ac76d7b74 100644 --- a/coverage/pom.xml +++ b/coverage/pom.xml @@ -49,9 +49,27 @@ vtl-spark 1.9.0-SNAPSHOT + + fr.insee.trevas + vtl-csv + 1.9.0-SNAPSHOT + + + com.fasterxml.jackson.core + jackson-databind + 2.15.2 + + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.3 + + --add-exports java.base/sun.nio.ch=ALL-UNNAMED + + org.jacoco jacoco-maven-plugin diff --git a/coverage/src/main/java/fr/insee/vtl/coverage/TCK.java b/coverage/src/main/java/fr/insee/vtl/coverage/TCK.java new file mode 100644 index 000000000..785762c0a --- /dev/null +++ b/coverage/src/main/java/fr/insee/vtl/coverage/TCK.java @@ -0,0 +1,139 @@ +package fr.insee.vtl.coverage; + +import com.fasterxml.jackson.databind.ObjectMapper; +import fr.insee.vtl.coverage.model.Folder; +import fr.insee.vtl.coverage.model.Test; +import fr.insee.vtl.coverage.utils.JSONStructureLoader; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.*; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +public class TCK { + private static final ObjectMapper objectMapper = new ObjectMapper(); + + public static List runTCK(InputStream zipInputStream) { + File extractedFolder; + try { + extractedFolder = init(zipInputStream); + } catch (IOException e) { + throw new RuntimeException("Error unzipping input stream", e); + } + + try { + return loadInput(extractedFolder); + } catch (Exception e) { + throw new RuntimeException("Error loading input from extracted folder", e); + } finally { + deleteDirectory(extractedFolder); + } + } + + public static List runTCK(File zipFile) { + try (InputStream in = Files.newInputStream(zipFile.toPath())) { + return runTCK(in); + } catch (IOException e) { + throw new RuntimeException("Error reading zip file: " + zipFile, e); + } + } + + public static List runTCK(String zipPath) { + return runTCK(new File(zipPath)); + } + + private static File init(InputStream zipInputStream) throws IOException { + Path tempDir = Files.createTempDirectory("tck-unzip-"); + try (ZipInputStream zis = new ZipInputStream(zipInputStream)) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + Path newPath = zipSlipProtect(entry, tempDir); + if (entry.isDirectory()) { + Files.createDirectories(newPath); + } else { + Files.createDirectories(newPath.getParent()); + Files.copy(zis, newPath, StandardCopyOption.REPLACE_EXISTING); + } + } + } + return tempDir.toFile(); + } + + private static Path zipSlipProtect(ZipEntry entry, Path targetDir) throws IOException { + Path target = targetDir.resolve(entry.getName()).normalize(); + if (!target.startsWith(targetDir)) { + throw new IOException("Entry is outside of the target dir: " + entry.getName()); + } + return target; + } + + private static void deleteDirectory(File dir) { + if (dir.isDirectory()) { + for (File file : Objects.requireNonNull(dir.listFiles())) { + deleteDirectory(file); + } + } + dir.delete(); + } + + public static List loadInput(File path) throws Exception { + List folders = new ArrayList<>(); + File[] files = path.listFiles(); + if (files != null) { + boolean isTestFolder = containsTestFiles(files); + + if (isTestFolder) { + Folder folder = new Folder(); + folder.setName(path.getName()); + Test test = new Test(); + + for (File file : files) { + switch (file.getName()) { + case "input.json": + test.setInput(JSONStructureLoader.loadDatasetsFromCSV(file)); + break; + case "output.json": + test.setOutputs(JSONStructureLoader.loadDatasetsFromCSV(file)); + break; + case "transformation.vtl": + String script = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8); + test.setScript(script); + break; + } + } + folder.setTest(test); + folders.add(folder); + } else { + for (File file : files) { + if (file.isDirectory()) { + Folder folder = new Folder(); + folder.setName(file.getName()); + folder.setFolders(loadInput(file)); + folders.add(folder); + } + } + } + } + return folders; + } + + private static boolean containsTestFiles(File[] files) { + Set required = new HashSet<>(Arrays.asList( + "input.json", "output.json", "transformation.vtl" + )); + Set found = new HashSet<>(); + for (File file : files) { + if (required.contains(file.getName())) { + found.add(file.getName()); + } + } + return found.containsAll(required); + } +} \ No newline at end of file diff --git a/coverage/src/main/java/fr/insee/vtl/coverage/model/Folder.java b/coverage/src/main/java/fr/insee/vtl/coverage/model/Folder.java new file mode 100644 index 000000000..b7a024c29 --- /dev/null +++ b/coverage/src/main/java/fr/insee/vtl/coverage/model/Folder.java @@ -0,0 +1,34 @@ +package fr.insee.vtl.coverage.model; + +import java.util.List; + +public class Folder { + + private String name; + private List folders; + private Test test; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Test getTest() { + return test; + } + + public void setTest(Test test) { + this.test = test; + } + + public List getFolders() { + return folders; + } + + public void setFolders(List folders) { + this.folders = folders; + } +} diff --git a/coverage/src/main/java/fr/insee/vtl/coverage/model/Test.java b/coverage/src/main/java/fr/insee/vtl/coverage/model/Test.java new file mode 100644 index 000000000..0df4e8a89 --- /dev/null +++ b/coverage/src/main/java/fr/insee/vtl/coverage/model/Test.java @@ -0,0 +1,36 @@ +package fr.insee.vtl.coverage.model; + +import fr.insee.vtl.model.Dataset; + +import java.util.Map; + +public class Test { + + private String script; + private Map input; + private Map outputs; + + public String getScript() { + return script; + } + + public void setScript(String script) { + this.script = script; + } + + public Map getInput() { + return input; + } + + public void setInput(Map input) { + this.input = input; + } + + public Map getOutputs() { + return outputs; + } + + public void setOutputs(Map outputs) { + this.outputs = outputs; + } +} diff --git a/coverage/src/main/java/fr/insee/vtl/coverage/package-info.java b/coverage/src/main/java/fr/insee/vtl/coverage/package-info.java new file mode 100644 index 000000000..65f1ac080 --- /dev/null +++ b/coverage/src/main/java/fr/insee/vtl/coverage/package-info.java @@ -0,0 +1,4 @@ +/** + * This package contains tools for TCK. + */ +package fr.insee.vtl.coverage; \ No newline at end of file diff --git a/coverage/src/main/java/fr/insee/vtl/coverage/utils/JSONStructureLoader.java b/coverage/src/main/java/fr/insee/vtl/coverage/utils/JSONStructureLoader.java new file mode 100644 index 000000000..bdb14000b --- /dev/null +++ b/coverage/src/main/java/fr/insee/vtl/coverage/utils/JSONStructureLoader.java @@ -0,0 +1,98 @@ +package fr.insee.vtl.coverage.utils; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import fr.insee.vtl.csv.CSVDataset; +import fr.insee.vtl.jackson.TrevasModule; +import fr.insee.vtl.model.Dataset; +import fr.insee.vtl.model.Structured; +import org.supercsv.prefs.CsvPreference; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class JSONStructureLoader { + + public static Map> loadStructures(File jsonFile) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new TrevasModule()); + + String json = Files.readString(jsonFile.toPath()); + JsonNode root = mapper.readTree(json); + + Map> structureMap = new HashMap<>(); + + JsonNode structures = root.get("structures"); + if (structures != null && structures.isArray()) { + for (JsonNode structure : structures) { + String structureName = structure.get("name").asText(); + List components = new ArrayList<>(); + + JsonNode componentArray = structure.get("components"); + if (componentArray != null && componentArray.isArray()) { + for (JsonNode componentNode : componentArray) { + String name = componentNode.get("name").asText(); + String role = componentNode.get("role").asText().toUpperCase(); + String type = componentNode.get("data_type").asText().toUpperCase(); + + // Build the JSON string dynamically + String componentJson = String.format( + "{ \"name\": \"%s\", \"type\": \"%s\", \"role\": \"%s\" }", + name, type, role + ); + + Structured.Component component = mapper.readValue(componentJson, Structured.Component.class); + components.add(component); + } + } + + structureMap.put(structureName, components); + } + } + return structureMap; + } + + + public static Map loadDatasetsFromCSV(File dataStructureFile) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new TrevasModule()); + + String json = Files.readString(dataStructureFile.toPath()); + JsonNode root = mapper.readTree(json); + + File parentDir = dataStructureFile.getParentFile(); + + Map> structures = loadStructures(dataStructureFile); + Map datasets = new HashMap<>(); + + JsonNode datasetArray = root.get("datasets"); + if (datasetArray != null && datasetArray.isArray()) { + for (JsonNode datasetNode : datasetArray) { + String datasetName = datasetNode.get("name").asText(); // ex: "DS_1" + String structureRef = datasetNode.get("structure").asText(); // ex: "DS_1" + + List components = structures.get(structureRef); + if (components == null) continue; + Structured.DataStructure structure = new Structured.DataStructure(components); + + File csvFile = new File(parentDir, datasetName + ".csv"); + + if (!csvFile.exists()) { + throw new FileNotFoundException("Missing CSV file for dataset: " + datasetName); + } + + Dataset dataset = new CSVDataset(structure, new FileReader(csvFile), CsvPreference.STANDARD_PREFERENCE); + datasets.put(datasetName, dataset); + } + } + return datasets; + } + +} \ No newline at end of file diff --git a/coverage/src/main/resources/.gitignore b/coverage/src/main/resources/.gitignore new file mode 100644 index 000000000..f59ec20aa --- /dev/null +++ b/coverage/src/main/resources/.gitignore @@ -0,0 +1 @@ +* \ No newline at end of file diff --git a/coverage/src/test/java/fr/insee/vtl/coverage/TCKTest.java b/coverage/src/test/java/fr/insee/vtl/coverage/TCKTest.java new file mode 100644 index 000000000..d3e0bd4b1 --- /dev/null +++ b/coverage/src/test/java/fr/insee/vtl/coverage/TCKTest.java @@ -0,0 +1,93 @@ +package fr.insee.vtl.coverage; + +import fr.insee.vtl.coverage.model.Folder; +import fr.insee.vtl.coverage.model.Test; +import fr.insee.vtl.engine.VtlScriptEngine; +import fr.insee.vtl.model.Dataset; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; + +import javax.script.*; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; + + +class TCKTest { + + private ScriptEngine engine; + + @BeforeEach + public void setUp() { + SparkSession spark = SparkSession.builder() + .appName("test") + .master("local") + .getOrCreate(); + + ScriptEngineManager mgr = new ScriptEngineManager(); + engine = mgr.getEngineByExtension("vtl"); + engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); + engine.put("$vtl.spark.session", spark); + } + + @TestFactory + Stream generateTests() { + InputStream in = getClass().getClassLoader().getResourceAsStream("v2.1.zip"); + // Skip the test factory entirely if file is not present + Assumptions.assumeTrue(in != null, "Skipping TCK tests: resource file not found"); + + List tests = TCK.runTCK(in); + Folder root = new Folder(); + root.setName("root"); + root.setFolders(tests); + return Stream.of(toDynamicNode(root)); + } + + private DynamicNode toDynamicNode(Folder folder) { + List children = new ArrayList<>(); + if (folder.getFolders() != null) { + for (Folder sub : folder.getFolders()) { + children.add(toDynamicNode(sub)); + } + } + + if (folder.getTest() != null) { + children.add(DynamicTest.dynamicTest( + folder.getName(), + () -> { + Test test = folder.getTest(); + String script = test.getScript(); + Map inputs = test.getInput(); + + Bindings bindings = new SimpleBindings(); + bindings.putAll(inputs); + + engine.getContext().setBindings(bindings, ScriptContext.ENGINE_SCOPE); + engine.eval(script); + + Map outputs = test.getOutputs(); + outputs.forEach((name, tckDataset) -> { + Object trevasValue = engine.getContext().getAttribute(name); + assertThat(trevasValue).isInstanceOf(Dataset.class); + Dataset trevasDataset = (Dataset) trevasValue; + assertThat(trevasDataset.getDataStructure()) + .as(script) + .isEqualTo(tckDataset.getDataStructure()); + assertThat(trevasDataset.getDataAsMap()) + .as(script) + .containsExactlyElementsOf(tckDataset.getDataAsMap()); + }); + } + )); + } + + return DynamicContainer.dynamicContainer( + folder.getName(), + children.stream() + ); + } +} diff --git a/pom.xml b/pom.xml index 815b7abbc..e9b1a1d7e 100644 --- a/pom.xml +++ b/pom.xml @@ -82,8 +82,12 @@ org.junit.jupiter - junit-jupiter-api - 5.11.0-M2 + junit-jupiter + test + + + org.junit.platform + junit-platform-launcher test @@ -94,6 +98,18 @@ + + + + org.junit + junit-bom + 5.12.2 + pom + import + + + + ${project.artifactId}-${project.version} @@ -125,7 +141,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.3.1 + 3.5.3 diff --git a/vtl-csv/src/main/java/fr/insee/vtl/csv/CSVDataset.java b/vtl-csv/src/main/java/fr/insee/vtl/csv/CSVDataset.java index 287956c5a..a9690e28a 100644 --- a/vtl-csv/src/main/java/fr/insee/vtl/csv/CSVDataset.java +++ b/vtl-csv/src/main/java/fr/insee/vtl/csv/CSVDataset.java @@ -8,6 +8,7 @@ import org.supercsv.cellprocessor.ift.CellProcessor; import org.supercsv.io.CsvMapReader; import org.supercsv.prefs.CsvPreference; +import org.threeten.extra.Interval; import java.io.IOException; import java.io.Reader; @@ -24,8 +25,12 @@ public class CSVDataset implements Dataset { private ArrayList data; public CSVDataset(DataStructure structure, Reader csv) throws IOException { + this(structure, csv, CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE); + } + + public CSVDataset(DataStructure structure, Reader csv, CsvPreference csvPreference) throws IOException { this.structure = structure; - this.csvReader = new CsvMapReader(csv, CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE); + this.csvReader = new CsvMapReader(csv, csvPreference); var columns = this.csvReader.getHeader(true); if (!this.structure.keySet().containsAll(List.of(columns))) { throw new RuntimeException("missing columns in CSV"); @@ -45,15 +50,18 @@ private CellProcessor getProcessor(Class type) { if (String.class.equals(type)) { return new Optional(); } else if (Long.class.equals(type)) { - return new ParseLong(); + return new Optional(new ParseLong()); } else if (Double.class.equals(type)) { - return new ParseDouble(); + return new Optional(new ParseDouble()); } else if (Boolean.class.equals(type)) { - return new ParseBool(); + return new Optional(new ParseBool()); } else if (Instant.class.equals(type)) { throw new RuntimeException("TODO"); } else if (LocalDate.class.equals(type)) { throw new RuntimeException("TODO"); + } else if (Interval.class.equals(type)) { + // TODO: refine + return new Optional(); } else { throw new UnsupportedOperationException("unsupported type " + type); } diff --git a/vtl-jackson/src/main/java/fr/insee/vtl/jackson/ComponentDeserializer.java b/vtl-jackson/src/main/java/fr/insee/vtl/jackson/ComponentDeserializer.java index 1df28eef1..a5fb1b0e2 100644 --- a/vtl-jackson/src/main/java/fr/insee/vtl/jackson/ComponentDeserializer.java +++ b/vtl-jackson/src/main/java/fr/insee/vtl/jackson/ComponentDeserializer.java @@ -5,8 +5,12 @@ import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import fr.insee.vtl.model.Dataset; import fr.insee.vtl.model.Structured; +import org.threeten.extra.Interval; +import org.threeten.extra.PeriodDuration; import java.io.IOException; +import java.time.Instant; +import java.time.OffsetDateTime; import java.util.Map; /** @@ -18,7 +22,11 @@ public class ComponentDeserializer extends StdDeserializer "STRING", String.class, "INTEGER", Long.class, "NUMBER", Double.class, - "BOOLEAN", Boolean.class + "BOOLEAN", Boolean.class, + "DATE", Instant.class, + "DURATION", PeriodDuration.class, + "TIME", OffsetDateTime.class, + "TIMEPERIOD", Interval.class ); /** diff --git a/vtl-jackson/src/main/java/module-info.java b/vtl-jackson/src/main/java/module-info.java index c183e09c0..560207e69 100644 --- a/vtl-jackson/src/main/java/module-info.java +++ b/vtl-jackson/src/main/java/module-info.java @@ -6,5 +6,6 @@ requires java.scripting; requires fr.insee.vtl.model; requires com.fasterxml.jackson.databind; + requires org.threeten.extra; exports fr.insee.vtl.jackson; } \ No newline at end of file diff --git a/vtl-model/src/main/java/fr/insee/vtl/model/Dataset.java b/vtl-model/src/main/java/fr/insee/vtl/model/Dataset.java index 276bce3ed..7d0513ae6 100644 --- a/vtl-model/src/main/java/fr/insee/vtl/model/Dataset.java +++ b/vtl-model/src/main/java/fr/insee/vtl/model/Dataset.java @@ -59,6 +59,10 @@ enum Role { /** * The component is an attribute in the data structure */ - ATTRIBUTE + ATTRIBUTE, + /** + * The component is a viral attribute in the data structure + */ + VIRALATTRIBUTE } } diff --git a/vtl-prov/pom.xml b/vtl-prov/pom.xml index ba4bd03ae..c5b143a1b 100644 --- a/vtl-prov/pom.xml +++ b/vtl-prov/pom.xml @@ -65,7 +65,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.3.0 + 3.5.3 --add-exports java.base/sun.nio.ch=ALL-UNNAMED diff --git a/vtl-sdmx/pom.xml b/vtl-sdmx/pom.xml index 2aa2e5688..76eab87f0 100644 --- a/vtl-sdmx/pom.xml +++ b/vtl-sdmx/pom.xml @@ -75,7 +75,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.3.0 + 3.5.3 --add-exports java.base/sun.nio.ch=ALL-UNNAMED diff --git a/vtl-spark/pom.xml b/vtl-spark/pom.xml index 8aeb38e26..eca885a93 100644 --- a/vtl-spark/pom.xml +++ b/vtl-spark/pom.xml @@ -54,7 +54,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.3.0 + 3.5.3 --add-exports java.base/sun.nio.ch=ALL-UNNAMED