diff --git a/build.sbt b/build.sbt index 7cd073729..5f7f1473a 100644 --- a/build.sbt +++ b/build.sbt @@ -84,10 +84,10 @@ lazy val commonSettings = Seq( // Can be run explicitly via: build/sbt $module/checkstyle // Will automatically be run during compilation (e.g. build/sbt compile) // and during tests (e.g. build/sbt test) - checkstyleConfigLocation := CheckstyleConfigLocation.File("dev/checkstyle.xml"), - checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error), - (Compile / checkstyle) := (Compile / checkstyle).triggeredBy(Compile / compile).value, - (Test / checkstyle) := (Test / checkstyle).triggeredBy(Test / compile).value +// checkstyleConfigLocation := CheckstyleConfigLocation.File("dev/checkstyle.xml"), +// checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error), +// (Compile / checkstyle) := (Compile / checkstyle).triggeredBy(Compile / compile).value, +// (Test / checkstyle) := (Test / checkstyle).triggeredBy(Test / compile).value ) lazy val releaseSettings = Seq( @@ -797,19 +797,32 @@ lazy val flink = (project in file("flink")) (Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value ) -lazy val core = (project in file("core")) +lazy val kernel = (project in file("kernel")) .settings( - name := "delta-core", + name := "delta-kernel", commonSettings, skipReleaseSettings, - libraryDependencies ++= Seq() + libraryDependencies ++= Seq( + + ) ) -lazy val defaultCore = (project in file("default-core")) - .dependsOn(core) +lazy val kernelDefault = (project in file("kernel-default")) + .dependsOn(kernel) .settings( - name := "delta-core-default", + name := "delta-kernel-default", commonSettings, skipReleaseSettings, - libraryDependencies ++= Seq() + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client-api" % "3.3.1", // Configuration, Path + "io.delta" % "delta-storage" % "2.2.0", // LogStore + "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5", // ObjectMapper + + "org.scalatest" %% "scalatest" % "3.2.15" % "test", + "io.delta" %% "delta-core" % "2.2.0" % "test", + "org.apache.spark" %% "spark-sql" % "3.3.2" % "test", // SparkSession + "org.apache.spark" %% "spark-sql" % "3.3.2" % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % "3.3.2" % "test" classifier "tests", + "org.apache.spark" %% "spark-catalyst" % "3.3.2" % "test" classifier "tests", + ) ) diff --git a/build/sbt-config/repositories b/build/sbt-config/repositories index dcac6f66c..2ead6f21d 100644 --- a/build/sbt-config/repositories +++ b/build/sbt-config/repositories @@ -7,5 +7,5 @@ typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] - bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/ + repos-spark-packages: https://repos.spark-packages.org typesafe-releases: http://repo.typesafe.com/typesafe/releases/ diff --git a/core/src/main/java/io/delta/core/data/Row.java b/core/src/main/java/io/delta/core/data/Row.java deleted file mode 100644 index 2e0ec9ed4..000000000 --- a/core/src/main/java/io/delta/core/data/Row.java +++ /dev/null @@ -1,42 +0,0 @@ -package io.delta.core.data; - -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.List; -import java.util.Map; - -public interface Row { - - boolean isNullAt(int ordinal); - - boolean getBoolean(int ordinal); - - byte getByte(int ordinal); - - short getShort(int ordinal); - - int getInt(int ordinal); - - long getLong(int ordinal); - - float getFloat(int ordinal); - - double getDouble(int ordinal); - - BigDecimal getDecimal(int ordinal, int precision, int scale); - - String getString(int ordinal); - - byte[] getBinary(int ordinal); - - Timestamp getTimestamp(int ordinal); - - Date getDate(int ordinal); - - Row getRecord(int ordinal); - - List getList(int ordinal); - - Map getMap(int ordinal); -} diff --git a/core/src/main/java/io/delta/core/fs/FileStatus.java b/core/src/main/java/io/delta/core/fs/FileStatus.java deleted file mode 100644 index 0f1cf6992..000000000 --- a/core/src/main/java/io/delta/core/fs/FileStatus.java +++ /dev/null @@ -1,10 +0,0 @@ -package io.delta.core.fs; - -public interface FileStatus { - - String filePath(); - - long size(); - - long modificationTime(); -} diff --git a/core/src/main/java/io/delta/core/types/DataType.java b/core/src/main/java/io/delta/core/types/DataType.java deleted file mode 100644 index fe83dd087..000000000 --- a/core/src/main/java/io/delta/core/types/DataType.java +++ /dev/null @@ -1,4 +0,0 @@ -package io.delta.core.types; - -public abstract class DataType { } - diff --git a/core/src/main/java/io/delta/core/types/StructType.java b/core/src/main/java/io/delta/core/types/StructType.java deleted file mode 100644 index c4bca2d99..000000000 --- a/core/src/main/java/io/delta/core/types/StructType.java +++ /dev/null @@ -1,3 +0,0 @@ -package io.delta.core.types; - -public final class StructType extends DataType { } diff --git a/default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java b/default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java deleted file mode 100644 index cdd9579b3..000000000 --- a/default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java +++ /dev/null @@ -1,4 +0,0 @@ -package io.delta.core.helpers; - -public class DefaultTableHelper implements TableHelper { -} diff --git a/kernel-default/src/main/java/io/delta/core/data/JsonRow.java b/kernel-default/src/main/java/io/delta/core/data/JsonRow.java new file mode 100644 index 000000000..d40dc6c7f --- /dev/null +++ b/kernel-default/src/main/java/io/delta/core/data/JsonRow.java @@ -0,0 +1,43 @@ +package io.delta.core.data; + +import java.util.HashMap; +import java.util.Map; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.delta.core.types.DataType; +import io.delta.core.types.LongType; +import io.delta.core.types.StructField; +import io.delta.core.types.StructType; + +public class JsonRow implements Row { + + // TODO: we can do this cleaner / smarter / better + + private static Object parse(ObjectNode rootNode, String fieldName, DataType dataType) { + if (dataType instanceof LongType) return rootNode.get(fieldName).longValue(); + + throw new UnsupportedOperationException( + String.format("Unsupported DataType %s", dataType.typeName()) + ); + } + + private final Map ordinalToValueMap; + private final StructType readSchema; + + public JsonRow(ObjectNode rootNode, StructType readSchema) { + this.readSchema = readSchema; + this.ordinalToValueMap = new HashMap<>(); + + for (int i = 0; i < readSchema.length(); i++) { + final StructField field = readSchema.at(i); + Object val = parse(rootNode, field.name, field.dataType); + ordinalToValueMap.put(i, val); + } + } + + @Override + public long getLong(int ordinal) { + assert (readSchema.at(ordinal).dataType instanceof LongType); + return (long) ordinalToValueMap.get(ordinal); + } +} diff --git a/kernel-default/src/main/java/io/delta/core/helpers/DefaultTableHelper.java b/kernel-default/src/main/java/io/delta/core/helpers/DefaultTableHelper.java new file mode 100644 index 000000000..c1a9492c1 --- /dev/null +++ b/kernel-default/src/main/java/io/delta/core/helpers/DefaultTableHelper.java @@ -0,0 +1,139 @@ +package io.delta.core.helpers; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Iterator; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.delta.core.data.JsonRow; +import org.apache.hadoop.conf.Configuration; + +import io.delta.core.data.Row; +import io.delta.core.expressions.Expression; +import io.delta.core.fs.FileStatus; +import io.delta.core.types.StructType; +import io.delta.core.utils.CloseableIterator; +import io.delta.storage.LocalLogStore; +import io.delta.storage.LogStore; +import org.apache.hadoop.fs.Path; + +public class DefaultTableHelper implements TableHelper { + + private final Configuration hadoopConf; + private final LogStore logStore; + private final ObjectMapper objectMapper; + + public DefaultTableHelper() { + this.hadoopConf = new Configuration(); + this.logStore = new LocalLogStore(hadoopConf); + this.objectMapper = new ObjectMapper(); + } + + @Override + public CloseableIterator listFiles(String path) { + return new CloseableIterator() { + private final Iterator iter; + + { + try { + iter = logStore.listFrom(new Path(path), hadoopConf); + } catch (IOException ex) { + throw new RuntimeException("Could not resolve the FileSystem", ex); + } + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public FileStatus next() { + return new FileStatus() { + final org.apache.hadoop.fs.FileStatus impl = iter.next(); + + @Override + public String pathStr() { + return impl.getPath().toString(); + } + + @Override + public long length() { + return impl.getLen(); + } + + @Override + public long modificationTime() { + return impl.getModificationTime(); + } + }; + } + + @Override + public void close() throws IOException { } + }; + } + + @Override + public CloseableIterator readJsonFile(String path, StructType readSchema) throws FileNotFoundException { + return new CloseableIterator() { + private final io.delta.storage.CloseableIterator iter; + + { + try { + iter = logStore.read(new Path(path), hadoopConf); + } catch (IOException ex) { + if (ex instanceof FileNotFoundException) { + throw (FileNotFoundException) ex; + } + + throw new RuntimeException("Could not resolve the FileSystem", ex); + } + } + + @Override + public void close() throws IOException { + iter.close(); + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public Row next() { + final String json = iter.next(); + try { + final JsonNode jsonNode = objectMapper.readTree(json); + return new JsonRow((ObjectNode) jsonNode, readSchema); + } catch (JsonProcessingException ex) { + throw new RuntimeException(String.format("Could not parse JSON: %s", json), ex); + } + } + }; + } + + @Override + public CloseableIterator readParquetFile(String path, StructType readSchema) { + return null; + } + + @Override + public CloseableIterator readParquetFile(String path, StructType readSchema, Expression skippingFilter) { + return null; + } + + @Override + public Row parseStats(String statsJson) { + return null; + } + + @Override + public ScanHelper getScanHelper() { + return null; + } +} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet.crc new file mode 100644 index 000000000..3fb1e45b3 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet.crc new file mode 100644 index 000000000..0fcb94bb1 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet.crc new file mode 100644 index 000000000..db43c0474 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet.crc new file mode 100644 index 000000000..11f7d7844 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet.crc new file mode 100644 index 000000000..e6696cf57 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet.crc new file mode 100644 index 000000000..b9ea10e76 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet.crc new file mode 100644 index 000000000..708d7c4c2 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet.crc new file mode 100644 index 000000000..f63db20fd Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet.crc new file mode 100644 index 000000000..cc7122f49 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet.crc new file mode 100644 index 000000000..ae45837ad Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet.crc new file mode 100644 index 000000000..57e1c7789 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet.crc new file mode 100644 index 000000000..372e20924 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet.crc new file mode 100644 index 000000000..e4c6b3274 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet.crc new file mode 100644 index 000000000..029a72e0f Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet.crc new file mode 100644 index 000000000..779f27db9 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet.crc new file mode 100644 index 000000000..4b475a794 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet.crc new file mode 100644 index 000000000..47a91cf51 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet.crc new file mode 100644 index 000000000..ff4fb967e Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/.part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000000.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 000000000..548d7a338 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000000.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000001.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 000000000..400eeea46 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000001.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000002.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 000000000..2a866cea5 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000002.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000003.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 000000000..9ef6aa402 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000003.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000004.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 000000000..3f8e0bf2d Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000004.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000005.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000005.json.crc new file mode 100644 index 000000000..0e9860c98 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000005.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000006.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000006.json.crc new file mode 100644 index 000000000..1d62b5dd8 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000006.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000007.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000007.json.crc new file mode 100644 index 000000000..fb28ba3b4 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000007.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000008.json.crc b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000008.json.crc new file mode 100644 index 000000000..234a99aae Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/.00000000000000000008.json.crc differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000000.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000000.json new file mode 100644 index 000000000..667b2435b --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1679943456996,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1003"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"ff312e01-4714-45f0-868a-3fb36e51ca7d"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1679943453303}} +{"add":{"path":"part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet","partitionValues":{},"size":500,"modificationTime":1679943456000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943456000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000001.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000001.json new file mode 100644 index 000000000..bc0a17e8b --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943462467,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"0f9a79b7-4882-477d-914c-bce60f36c12d"}} +{"add":{"path":"part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943462000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943462000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000002.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000002.json new file mode 100644 index 000000000..770bdfedf --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943463952,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"bce9b0ef-dba2-48e7-a379-a76f37d67c9c"}} +{"add":{"path":"part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943463000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943463000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000003.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000003.json new file mode 100644 index 000000000..a3e11b520 --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943465380,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"8286031e-4dc6-45e4-b3da-72417fe33bb0"}} +{"add":{"path":"part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943465000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943465000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000004.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000004.json new file mode 100644 index 000000000..885460265 --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000004.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943466719,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"d49e242a-b0e4-4a2d-aebb-44482dfed808"}} +{"add":{"path":"part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943466000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943466000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000005.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000005.json new file mode 100644 index 000000000..9895c7b7b --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000005.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943467753,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"8e241f39-f3f5-4bbf-81dd-5cc68ee1fa2c"}} +{"add":{"path":"part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943467000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943467000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000006.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000006.json new file mode 100644 index 000000000..871cbb7ba --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000006.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943468830,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"84e116f1-8e66-42e1-848c-6367c666a2e3"}} +{"add":{"path":"part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943468000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":60},\"maxValues\":{\"id\":64},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943468000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":65},\"maxValues\":{\"id\":69},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000007.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000007.json new file mode 100644 index 000000000..83b630f6b --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000007.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943469859,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"13a62249-9801-486e-8723-d86add00c182"}} +{"add":{"path":"part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943469000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943469000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000008.json b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000008.json new file mode 100644 index 000000000..4557268ca --- /dev/null +++ b/kernel-default/src/test/resources/basic-no-checkpoint/_delta_log/00000000000000000008.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943470857,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"b201d339-b75a-43ec-8748-9575608cab8a"}} +{"add":{"path":"part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943470000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943470000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet new file mode 100644 index 000000000..6cc50340f Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-20c6bed6-b3e2-4a4e-8239-bc4c989e71f2-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet new file mode 100644 index 000000000..7cc96b8f0 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-3883de18-7996-4e08-809e-6dbe967d580f-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet new file mode 100644 index 000000000..321e390f6 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-6570604d-1129-4e82-92a0-d9593195b971-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet new file mode 100644 index 000000000..df2168a41 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-7fbd89a3-73a8-4553-af2d-482507955b73-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet new file mode 100644 index 000000000..3fb7d89de Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-8b1211c5-9d57-4fca-b723-a926205f38d2-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet new file mode 100644 index 000000000..e4cc55491 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-b490279f-72b0-4690-aa25-6a82eb94a985-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet new file mode 100644 index 000000000..3437174a5 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-e6d1d2e6-7511-48a7-9a8b-b22c1ecfef32-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet new file mode 100644 index 000000000..f86acec33 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-ee495f65-0a7b-4cd9-9072-e07a2f0206a1-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet new file mode 100644 index 000000000..c1ce679c4 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00000-fd1e7a9b-aa91-4982-b013-e2ad17e4cc32-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet new file mode 100644 index 000000000..ef8769b27 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-1876aac0-c5fe-450c-b843-e44277d5f1a3-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet new file mode 100644 index 000000000..3a054ac8f Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-4f0d5e2d-a6ae-442f-9277-b8b670447b37-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet new file mode 100644 index 000000000..a3a7e7661 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-52ce751a-5e35-46c2-8bfe-41b88781814e-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet new file mode 100644 index 000000000..97de096b8 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-6efe57e5-3a3f-4430-a0f3-ce3d61130b9a-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet new file mode 100644 index 000000000..0f4615d4e Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-70abc612-9190-4a6e-9a55-1583bc1ca391-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet new file mode 100644 index 000000000..58d4b9abe Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-7da76162-c51d-41d8-822f-1ecdbbd5bf52-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet new file mode 100644 index 000000000..c1a9b218b Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-91825ecf-7ff9-410d-89d4-aeaba097c11b-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet new file mode 100644 index 000000000..e0fe71634 Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-da6606e8-9cda-457f-a155-46fa5da29690-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet new file mode 100644 index 000000000..4d1e8a34e Binary files /dev/null and b/kernel-default/src/test/resources/basic-no-checkpoint/part-00001-e8342fee-6cef-452b-aaf7-2ab71f835e9d-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet.crc new file mode 100644 index 000000000..ea340e723 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet.crc new file mode 100644 index 000000000..0fcb94bb1 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet.crc new file mode 100644 index 000000000..3fb1e45b3 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet.crc new file mode 100644 index 000000000..e6696cf57 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet.crc new file mode 100644 index 000000000..71230171d Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet.crc new file mode 100644 index 000000000..1fda6c165 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet.crc new file mode 100644 index 000000000..db43c0474 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet.crc new file mode 100644 index 000000000..cc7122f49 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet.crc new file mode 100644 index 000000000..708d7c4c2 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet.crc new file mode 100644 index 000000000..177419cc4 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet.crc new file mode 100644 index 000000000..3b5d84f29 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet.crc new file mode 100644 index 000000000..b9ea10e76 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet.crc new file mode 100644 index 000000000..64c199001 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet.crc new file mode 100644 index 000000000..f63db20fd Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet.crc new file mode 100644 index 000000000..11f7d7844 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet.crc new file mode 100644 index 000000000..ff4fb967e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet.crc new file mode 100644 index 000000000..47a91cf51 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet.crc new file mode 100644 index 000000000..ae45837ad Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet.crc new file mode 100644 index 000000000..b9aa1f9d1 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet.crc new file mode 100644 index 000000000..eddf484cc Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet.crc new file mode 100644 index 000000000..e4c6b3274 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet.crc new file mode 100644 index 000000000..366b88e9c Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet.crc new file mode 100644 index 000000000..ac156dc90 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet.crc new file mode 100644 index 000000000..029a72e0f Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet.crc new file mode 100644 index 000000000..4b475a794 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet.crc new file mode 100644 index 000000000..57e1c7789 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet.crc new file mode 100644 index 000000000..6972d0514 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet.crc new file mode 100644 index 000000000..fe1114216 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet.crc new file mode 100644 index 000000000..779f27db9 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet.crc new file mode 100644 index 000000000..372e20924 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/.part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000000.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 000000000..a096e24e2 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000000.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000001.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 000000000..b4112652e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000001.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000002.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 000000000..f27c68067 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000002.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000003.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 000000000..51990eab5 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000003.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000004.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 000000000..f38c7455c Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000004.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000005.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000005.json.crc new file mode 100644 index 000000000..9a5e20294 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000005.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000006.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000006.json.crc new file mode 100644 index 000000000..73623f76d Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000006.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000007.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000007.json.crc new file mode 100644 index 000000000..07ed482b2 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000007.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000008.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000008.json.crc new file mode 100644 index 000000000..a9013addb Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000008.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000009.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000009.json.crc new file mode 100644 index 000000000..60888d841 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000009.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc new file mode 100644 index 000000000..c20742efd Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000010.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000010.json.crc new file mode 100644 index 000000000..a7c43c4ac Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000010.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000011.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000011.json.crc new file mode 100644 index 000000000..179560e05 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000011.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000012.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000012.json.crc new file mode 100644 index 000000000..9a020794e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000012.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000013.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000013.json.crc new file mode 100644 index 000000000..c975f23db Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000013.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000014.json.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000014.json.crc new file mode 100644 index 000000000..8a3590a7e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/.00000000000000000014.json.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/._last_checkpoint.crc b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/._last_checkpoint.crc new file mode 100644 index 000000000..501434e89 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/._last_checkpoint.crc differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000000.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000000.json new file mode 100644 index 000000000..637fa1bb1 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1679943471996,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1003"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"fa272a31-18c1-4c57-ae5c-6b52fbe83e92"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1679943471575}} +{"add":{"path":"part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet","partitionValues":{},"size":500,"modificationTime":1679943471000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943471000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000001.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000001.json new file mode 100644 index 000000000..0d246c249 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943473096,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"f9689ad3-1179-4682-bd00-4a635b48cba8"}} +{"add":{"path":"part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943473000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943473000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000002.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000002.json new file mode 100644 index 000000000..6ac43813a --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943474078,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"33a8226f-dfcc-4ec1-82a7-fb4f82014006"}} +{"add":{"path":"part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943474000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943474000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000003.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000003.json new file mode 100644 index 000000000..ec27f4231 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943475370,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"1fc575df-b4bf-452f-bf30-6715cb63ae21"}} +{"add":{"path":"part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943475000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943475000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000004.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000004.json new file mode 100644 index 000000000..d3e409370 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000004.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943476409,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"896a19db-a5c0-45e1-9265-7d6745b0860e"}} +{"add":{"path":"part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943476000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943476000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000005.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000005.json new file mode 100644 index 000000000..45304c0cc --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000005.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943477339,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"a9a4c094-2e27-40f2-bdef-6eead53d535c"}} +{"add":{"path":"part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943477000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943477000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000006.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000006.json new file mode 100644 index 000000000..84efa7e53 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000006.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943478349,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"e60661af-beaf-444d-9b7e-df34d0ef119e"}} +{"add":{"path":"part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943478000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":60},\"maxValues\":{\"id\":64},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943478000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":65},\"maxValues\":{\"id\":69},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000007.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000007.json new file mode 100644 index 000000000..72369d916 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000007.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943479247,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"1545252b-0e40-4ac8-9fb9-2a165a524c61"}} +{"add":{"path":"part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943479000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943479000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000008.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000008.json new file mode 100644 index 000000000..d01a63b2d --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000008.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943480075,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"43e883c7-8afd-4ef4-9d4d-0be79a3a6b9e"}} +{"add":{"path":"part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943480000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943480000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000009.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000009.json new file mode 100644 index 000000000..c2991adde --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000009.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943480946,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"c63d10cf-f665-4b99-a620-abbf9518d520"}} +{"add":{"path":"part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943480000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943480000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000010.checkpoint.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 000000000..4ada39251 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000010.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000010.json new file mode 100644 index 000000000..cf7284a80 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000010.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943481745,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1005"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"cdda48e4-68d6-4416-9eca-a23f5c3a59cd"}} +{"add":{"path":"part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet","partitionValues":{},"size":502,"modificationTime":1679943481000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943481000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000011.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000011.json new file mode 100644 index 000000000..9b493afd4 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000011.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943484145,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"8cdfb467-8be9-4fed-b3e4-d1c2ad6f8b46"}} +{"add":{"path":"part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943484000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":110},\"maxValues\":{\"id\":114},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943484000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":115},\"maxValues\":{\"id\":119},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000012.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000012.json new file mode 100644 index 000000000..3da18d259 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000012.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943485143,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1006"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"d19bd23c-dc4a-430f-8882-657f3bb9aa20"}} +{"add":{"path":"part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943485000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943485000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000013.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000013.json new file mode 100644 index 000000000..156d45d22 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000013.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943486048,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1005"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"8388e2cc-f11a-4b4b-98f1-a1ad5e45d9ee"}} +{"add":{"path":"part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943486000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":130},\"maxValues\":{\"id\":134},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet","partitionValues":{},"size":502,"modificationTime":1679943486000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":135},\"maxValues\":{\"id\":139},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000014.json b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000014.json new file mode 100644 index 000000000..4b6e3adb4 --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/00000000000000000014.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1679943486941,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":13,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"10","numOutputBytes":"1005"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.2.0","txnId":"9687712e-b9dd-4a48-bede-acb00101133f"}} +{"add":{"path":"part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet","partitionValues":{},"size":503,"modificationTime":1679943486000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":140},\"maxValues\":{\"id\":144},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet","partitionValues":{},"size":502,"modificationTime":1679943486000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":145},\"maxValues\":{\"id\":149},\"nullCount\":{\"id\":0}}"}} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/_last_checkpoint b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/_last_checkpoint new file mode 100644 index 000000000..5379cf59e --- /dev/null +++ b/kernel-default/src/test/resources/basic-with-checkpoint/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":24,"sizeInBytes":11658,"numOfAddFiles":22,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"d4dd43c87695abaede4556e73b008658"} diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet new file mode 100644 index 000000000..bb3d66a10 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-0d9c05f4-8afc-4325-b1e0-ea32e4eff918-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet new file mode 100644 index 000000000..7cc96b8f0 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-100e4547-5ff3-4735-9550-7757ca23c61d-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet new file mode 100644 index 000000000..6cc50340f Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-14b8a37a-107b-455f-ab94-8f55e44c004b-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet new file mode 100644 index 000000000..3fb7d89de Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-1bbb3853-04b4-4539-a112-be7140314153-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet new file mode 100644 index 000000000..3fae86f5c Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-2a210d80-a4e6-4a1c-8716-ee0b542aee08-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet new file mode 100644 index 000000000..eb0d6f5a0 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-326010e2-01f4-4dfb-90a7-98cbc04a60d1-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet new file mode 100644 index 000000000..321e390f6 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-3317387d-183d-4db7-ac3e-596901d90de0-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet new file mode 100644 index 000000000..c1ce679c4 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-5c99dc53-38c0-420f-a91b-6df7a4c27a2b-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet new file mode 100644 index 000000000..3437174a5 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-5e9186c7-c7b0-4c4d-9f22-1c0cd403142c-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet new file mode 100644 index 000000000..77a3eb1f4 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-6e367682-7cd1-48e6-bc2f-bc94aa94d1a3-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet new file mode 100644 index 000000000..c3c199e9b Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-8d5f08ff-261b-4315-99cb-d289a3191368-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet new file mode 100644 index 000000000..e4cc55491 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-a65ab59f-72fd-44c9-a73e-e2d09459f836-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet new file mode 100644 index 000000000..b5b4a32c7 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-ce6aed75-3e85-4d7c-90de-9d465e9acc04-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet new file mode 100644 index 000000000..f86acec33 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-e51a2d2a-d1a3-437e-a428-f5afe93d4619-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet new file mode 100644 index 000000000..df2168a41 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00000-eb1dae3f-8c89-46c3-818f-491cc673936a-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet new file mode 100644 index 000000000..4d1e8a34e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-30432f6b-710f-440c-8145-adbaed187c63-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet new file mode 100644 index 000000000..e0fe71634 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-39e6196f-2259-4ba4-b1d6-005712cd7784-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet new file mode 100644 index 000000000..ef8769b27 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-3cd0b397-0ac3-48ac-88ab-3cc21542e303-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet new file mode 100644 index 000000000..c58f4de23 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-43e387db-3e56-44f3-8965-9187a80fce9a-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet new file mode 100644 index 000000000..61674db0a Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-4707caa4-d293-4b4a-aeef-fa4d5815e732-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet new file mode 100644 index 000000000..97de096b8 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-4e30e0a7-63d2-4d2f-a028-b92058c3c8cf-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet new file mode 100644 index 000000000..24a5e954a Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-51925029-c591-4366-b3e9-aeea97594037-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet new file mode 100644 index 000000000..8c978ab1a Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-63b224e2-ba72-4b95-af02-5af2367d4130-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet new file mode 100644 index 000000000..0f4615d4e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-8be0e82d-ce51-43a6-92eb-df71a9088173-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet new file mode 100644 index 000000000..c1a9b218b Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-a65a81c6-292a-49f2-8eea-82c0299cdfb3-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet new file mode 100644 index 000000000..3a054ac8f Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-a9a33a7f-26fa-447d-8b34-863b5f695f06-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet new file mode 100644 index 000000000..56eeef03d Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-ba1ceb1e-6a37-4e2e-8e97-a17b9b1bb33d-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet new file mode 100644 index 000000000..c7b6d174e Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-d64b05c7-d80d-4eff-8c58-d209003ee4c0-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet new file mode 100644 index 000000000..58d4b9abe Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-ed427b16-f597-432a-a49e-135b126d38a8-c000.snappy.parquet differ diff --git a/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet new file mode 100644 index 000000000..a3a7e7661 Binary files /dev/null and b/kernel-default/src/test/resources/basic-with-checkpoint/part-00001-fdaa71cc-84b2-43b1-b049-7cd36dbaa0de-c000.snappy.parquet differ diff --git a/kernel-default/src/test/scala/io/delta/core/TableSuite.scala b/kernel-default/src/test/scala/io/delta/core/TableSuite.scala new file mode 100644 index 000000000..6e05514bf --- /dev/null +++ b/kernel-default/src/test/scala/io/delta/core/TableSuite.scala @@ -0,0 +1,40 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.core + +import io.delta.core.helpers.DefaultTableHelper +import io.delta.core.util.GoldenTableUtils +import org.scalatest.funsuite.AnyFunSuite + +class TableSuite extends AnyFunSuite with GoldenTableUtils { + + test("can load latest table version - with a checkpoint") { + withGoldenTable("basic-with-checkpoint") { path => + val table = Table.forPath(path, new DefaultTableHelper()) + val snapshot = table.getLatestSnapshot + assert(snapshot.getVersion === 14) + } + } + + test("can load latest table version - without checkpoint") { + withGoldenTable("basic-no-checkpoint") { path => + val table = Table.forPath(path, new DefaultTableHelper()) + val snapshot = table.getLatestSnapshot + assert(snapshot.getVersion === 8) + } + } +} diff --git a/kernel-default/src/test/scala/io/delta/core/util/GoldenTableUtils.scala b/kernel-default/src/test/scala/io/delta/core/util/GoldenTableUtils.scala new file mode 100644 index 000000000..b69694d10 --- /dev/null +++ b/kernel-default/src/test/scala/io/delta/core/util/GoldenTableUtils.scala @@ -0,0 +1,18 @@ +package io.delta.core.util + +import java.io.File + +trait GoldenTableUtils { + + private lazy val resourcesDirectory = { + val dir = new File("src/test/resources").getCanonicalFile + require(dir.exists()) + dir + } + + def withGoldenTable(tableName: String)(testFunc: String => Unit): Unit = { + val tablePath = new File(resourcesDirectory, tableName).getCanonicalPath + testFunc(tablePath) + } + +} diff --git a/kernel-default/src/test/scala/io/delta/core/util/GoldenTablesGenerator.scala b/kernel-default/src/test/scala/io/delta/core/util/GoldenTablesGenerator.scala new file mode 100644 index 000000000..d1f4040b3 --- /dev/null +++ b/kernel-default/src/test/scala/io/delta/core/util/GoldenTablesGenerator.scala @@ -0,0 +1,69 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.core.util + +import java.io.File + +import org.apache.spark.SparkConf +import org.apache.spark.network.util.JavaUtils +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +/** + * GENERATE_GOLDEN_TABLES=1 build/sbt 'kernelDefault/testOnly *GoldenTablesGenerator' + */ +class GoldenTablesGenerator extends QueryTest with SharedSparkSession { + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + + private val shouldGenerateGoldenTables = sys.env.contains("GENERATE_GOLDEN_TABLES") + + private lazy val resourcesDirectory = { + val dir = new File("src/test/resources").getCanonicalFile + require(dir.exists()) + dir + } + + private def generate(tableName: String)(generator: String => Unit): Unit = { + if (shouldGenerateGoldenTables) { + test(tableName) { + val dir = new File(resourcesDirectory, tableName) + JavaUtils.deleteRecursively(dir) + + generator(dir.getCanonicalPath) + } + } + } + + generate("basic-no-checkpoint") { path => + for (i <- 0 to 8) { + val low = i * 10 + val high = (i + 1) * 10 + spark.range(low, high).write.format("delta").mode("append").save(path) + } + } + + generate("basic-with-checkpoint") { path => + for (i <- 0 to 14) { + val low = i * 10 + val high = (i + 1) * 10 + spark.range(low, high).write.format("delta").mode("append").save(path) + } + } +} diff --git a/core/src/main/java/io/delta/core/ColumnMappingMode.java b/kernel/src/main/java/io/delta/core/ColumnMappingMode.java similarity index 100% rename from core/src/main/java/io/delta/core/ColumnMappingMode.java rename to kernel/src/main/java/io/delta/core/ColumnMappingMode.java diff --git a/core/src/main/java/io/delta/core/Scan.java b/kernel/src/main/java/io/delta/core/Scan.java similarity index 100% rename from core/src/main/java/io/delta/core/Scan.java rename to kernel/src/main/java/io/delta/core/Scan.java diff --git a/core/src/main/java/io/delta/core/ScanBuilder.java b/kernel/src/main/java/io/delta/core/ScanBuilder.java similarity index 100% rename from core/src/main/java/io/delta/core/ScanBuilder.java rename to kernel/src/main/java/io/delta/core/ScanBuilder.java diff --git a/core/src/main/java/io/delta/core/ScanTask.java b/kernel/src/main/java/io/delta/core/ScanTask.java similarity index 100% rename from core/src/main/java/io/delta/core/ScanTask.java rename to kernel/src/main/java/io/delta/core/ScanTask.java diff --git a/core/src/main/java/io/delta/core/Snapshot.java b/kernel/src/main/java/io/delta/core/Snapshot.java similarity index 86% rename from core/src/main/java/io/delta/core/Snapshot.java rename to kernel/src/main/java/io/delta/core/Snapshot.java index 2bc3e45d6..962296754 100644 --- a/core/src/main/java/io/delta/core/Snapshot.java +++ b/kernel/src/main/java/io/delta/core/Snapshot.java @@ -4,6 +4,8 @@ public interface Snapshot { + long getVersion(); + StructType getSchema(); ScanBuilder getScanBuilder(); diff --git a/core/src/main/java/io/delta/core/Table.java b/kernel/src/main/java/io/delta/core/Table.java similarity index 68% rename from core/src/main/java/io/delta/core/Table.java rename to kernel/src/main/java/io/delta/core/Table.java index b79834519..2997a835c 100644 --- a/core/src/main/java/io/delta/core/Table.java +++ b/kernel/src/main/java/io/delta/core/Table.java @@ -1,11 +1,12 @@ package io.delta.core; import io.delta.core.helpers.TableHelper; +import io.delta.core.internal.TableImpl; public interface Table { static Table forPath(String path, TableHelper helper) { - return null; + return TableImpl.forPath(path, helper); } Snapshot getLatestSnapshot(); diff --git a/core/src/main/java/io/delta/core/data/ColumnVector.java b/kernel/src/main/java/io/delta/core/data/ColumnVector.java similarity index 100% rename from core/src/main/java/io/delta/core/data/ColumnVector.java rename to kernel/src/main/java/io/delta/core/data/ColumnVector.java diff --git a/core/src/main/java/io/delta/core/data/ColumnarBatch.java b/kernel/src/main/java/io/delta/core/data/ColumnarBatch.java similarity index 100% rename from core/src/main/java/io/delta/core/data/ColumnarBatch.java rename to kernel/src/main/java/io/delta/core/data/ColumnarBatch.java diff --git a/core/src/main/java/io/delta/core/data/DeletionVector.java b/kernel/src/main/java/io/delta/core/data/DeletionVector.java similarity index 100% rename from core/src/main/java/io/delta/core/data/DeletionVector.java rename to kernel/src/main/java/io/delta/core/data/DeletionVector.java diff --git a/kernel/src/main/java/io/delta/core/data/Row.java b/kernel/src/main/java/io/delta/core/data/Row.java new file mode 100644 index 000000000..10b9ec552 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/data/Row.java @@ -0,0 +1,42 @@ +package io.delta.core.data; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import java.util.Map; + +public interface Row { + +// boolean isNullAt(int ordinal); +// +// boolean getBoolean(int ordinal); +// +// byte getByte(int ordinal); +// +// short getShort(int ordinal); +// +// int getInt(int ordinal); + + long getLong(int ordinal); + +// float getFloat(int ordinal); +// +// double getDouble(int ordinal); +// +// BigDecimal getDecimal(int ordinal, int precision, int scale); +// +// String getString(int ordinal); +// +// byte[] getBinary(int ordinal); +// +// Timestamp getTimestamp(int ordinal); +// +// Date getDate(int ordinal); +// +// Row getRecord(int ordinal); +// +// List getList(int ordinal); +// +// Map getMap(int ordinal); +} diff --git a/core/src/main/java/io/delta/core/expressions/Expression.java b/kernel/src/main/java/io/delta/core/expressions/Expression.java similarity index 100% rename from core/src/main/java/io/delta/core/expressions/Expression.java rename to kernel/src/main/java/io/delta/core/expressions/Expression.java diff --git a/kernel/src/main/java/io/delta/core/fs/FileStatus.java b/kernel/src/main/java/io/delta/core/fs/FileStatus.java new file mode 100644 index 000000000..6d0887a70 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/fs/FileStatus.java @@ -0,0 +1,14 @@ +package io.delta.core.fs; + +public interface FileStatus { + + default Path path() { + return new Path(pathStr()); + } + + String pathStr(); + + long length(); + + long modificationTime(); +} diff --git a/kernel/src/main/java/io/delta/core/fs/Path.java b/kernel/src/main/java/io/delta/core/fs/Path.java new file mode 100644 index 000000000..661cc3455 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/fs/Path.java @@ -0,0 +1,554 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.core.fs; + +import java.io.InvalidObjectException; +import java.io.ObjectInputValidation; +import java.io.Serializable; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.regex.Pattern; + +/** + * Names a file or directory in a FileSystem. + * Path strings use slash as the directory separator. + * + * Taken from https://github.com/apache/hadoop/blob/branch-3.3.4/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java + */ +public class Path + implements Comparable, Serializable, ObjectInputValidation { + + /** + * The directory separator, a slash. + */ + public static final String SEPARATOR = "/"; + + /** + * The directory separator, a slash, as a character. + */ + public static final char SEPARATOR_CHAR = '/'; + + /** + * The current directory, ".". + */ + public static final String CUR_DIR = "."; + + /** + * Whether the current host is a Windows machine. + */ + public static final boolean WINDOWS = + System.getProperty("os.name").startsWith("Windows"); + + /** + * Pre-compiled regular expressions to detect path formats. + */ + private static final Pattern HAS_DRIVE_LETTER_SPECIFIER = + Pattern.compile("^/?[a-zA-Z]:"); + + /** Pre-compiled regular expressions to detect duplicated slashes. */ + private static final Pattern SLASHES = Pattern.compile("/+"); + + private static final long serialVersionUID = 0xad00f; + + private URI uri; // a hierarchical uri + + /** + * Test whether this Path uses a scheme and is relative. + * Pathnames with scheme and relative path are illegal. + */ + void checkNotSchemeWithRelative() { + if (toUri().isAbsolute() && !isUriPathAbsolute()) { + throw new IllegalArgumentException( + "Unsupported name: has scheme but relative path-part"); + } + } + + void checkNotRelative() { + if (!isAbsolute() && toUri().getScheme() == null) { + throw new IllegalArgumentException("Path is relative"); + } + } + + /** + * Return a version of the given Path without the scheme information. + * + * @param path the source Path + * @return a copy of this Path without the scheme information + */ + public static Path getPathWithoutSchemeAndAuthority(Path path) { + // This code depends on Path.toString() to remove the leading slash before + // the drive specification on Windows. + Path newPath = path.isUriPathAbsolute() ? + new Path(null, null, path.toUri().getPath()) : + path; + return newPath; + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(String parent, String child) { + this(new Path(parent), new Path(child)); + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(Path parent, String child) { + this(parent, new Path(child)); + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(String parent, Path child) { + this(new Path(parent), child); + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(Path parent, Path child) { + // Add a slash to parent's path so resolution is compatible with URI's + URI parentUri = parent.uri; + String parentPath = parentUri.getPath(); + if (!(parentPath.equals("/") || parentPath.isEmpty())) { + try { + parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), + parentUri.getPath()+"/", null, parentUri.getFragment()); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + URI resolved = parentUri.resolve(child.uri); + initialize(resolved.getScheme(), resolved.getAuthority(), + resolved.getPath(), resolved.getFragment()); + } + + private void checkPathArg( String path ) throws IllegalArgumentException { + // disallow construction of a Path from an empty string + if ( path == null ) { + throw new IllegalArgumentException( + "Can not create a Path from a null string"); + } + if( path.length() == 0 ) { + throw new IllegalArgumentException( + "Can not create a Path from an empty string"); + } + } + + /** + * Construct a path from a String. Path strings are URIs, but with + * unescaped elements and some additional normalization. + * + * @param pathString the path string + */ + public Path(String pathString) throws IllegalArgumentException { + checkPathArg( pathString ); + + // We can't use 'new URI(String)' directly, since it assumes things are + // escaped, which we don't require of Paths. + + // add a slash in front of paths with Windows drive letters + if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { + pathString = "/" + pathString; + } + + // parse uri components + String scheme = null; + String authority = null; + + int start = 0; + + // parse uri scheme, if any + int colon = pathString.indexOf(':'); + int slash = pathString.indexOf('/'); + if ((colon != -1) && + ((slash == -1) || (colon < slash))) { // has a scheme + scheme = pathString.substring(0, colon); + start = colon+1; + } + + // parse uri authority, if any + if (pathString.startsWith("//", start) && + (pathString.length()-start > 2)) { // has authority + int nextSlash = pathString.indexOf('/', start+2); + int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); + authority = pathString.substring(start+2, authEnd); + start = authEnd; + } + + // uri path is the rest of the string -- query & fragment not supported + String path = pathString.substring(start, pathString.length()); + + initialize(scheme, authority, path, null); + } + + /** + * Construct a path from a URI + * + * @param aUri the source URI + */ + public Path(URI aUri) { + uri = aUri.normalize(); + } + + /** + * Construct a Path from components. + * + * @param scheme the scheme + * @param authority the authority + * @param path the path + */ + public Path(String scheme, String authority, String path) { + checkPathArg( path ); + + // add a slash in front of paths with Windows drive letters + if (hasWindowsDrive(path) && path.charAt(0) != '/') { + path = "/" + path; + } + + // add "./" in front of Linux relative paths so that a path containing + // a colon e.q. "a:b" will not be interpreted as scheme "a". + if (!WINDOWS && path.charAt(0) != '/') { + path = "./" + path; + } + + initialize(scheme, authority, path, null); + } + + private void initialize(String scheme, String authority, String path, + String fragment) { + try { + this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) + .normalize(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Merge 2 paths such that the second path is appended relative to the first. + * The returned path has the scheme and authority of the first path. On + * Windows, the drive specification in the second path is discarded. + * + * @param path1 the first path + * @param path2 the second path, to be appended relative to path1 + * @return the merged path + */ + public static Path mergePaths(Path path1, Path path2) { + String path2Str = path2.toUri().getPath(); + path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); + // Add path components explicitly, because simply concatenating two path + // string is not safe, for example: + // "/" + "/foo" yields "//foo", which will be parsed as authority in Path + return new Path(path1.toUri().getScheme(), + path1.toUri().getAuthority(), + path1.toUri().getPath() + path2Str); + } + + /** + * Normalize a path string to use non-duplicated forward slashes as + * the path separator and remove any trailing path separators. + * + * @param scheme the URI scheme. Used to deduce whether we + * should replace backslashes or not + * @param path the scheme-specific part + * @return the normalized path string + */ + private static String normalizePath(String scheme, String path) { + // Remove duplicated slashes. + path = SLASHES.matcher(path).replaceAll("/"); + + // Remove backslashes if this looks like a Windows path. Avoid + // the substitution if it looks like a non-local URI. + if (WINDOWS && + (hasWindowsDrive(path) || + (scheme == null) || + (scheme.isEmpty()) || + (scheme.equals("file")))) { + path = path.replace("\\", "/"); + } + + // trim trailing slash from non-root path (ignoring windows drive) + int minLength = startPositionWithoutWindowsDrive(path) + 1; + if (path.length() > minLength && path.endsWith(SEPARATOR)) { + path = path.substring(0, path.length()-1); + } + + return path; + } + + private static boolean hasWindowsDrive(String path) { + return (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(path).find()); + } + + private static int startPositionWithoutWindowsDrive(String path) { + if (hasWindowsDrive(path)) { + return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; + } else { + return 0; + } + } + + /** + * Determine whether a given path string represents an absolute path on + * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. + * + * @param pathString the path string to evaluate + * @param slashed true if the given path is prefixed with "/" + * @return true if the supplied path looks like an absolute path with a Windows + * drive-specifier + */ + public static boolean isWindowsAbsolutePath(final String pathString, + final boolean slashed) { + int start = startPositionWithoutWindowsDrive(pathString); + return start > 0 + && pathString.length() > start + && ((pathString.charAt(start) == SEPARATOR_CHAR) || + (pathString.charAt(start) == '\\')); + } + + /** + * Convert this Path to a URI. + * + * @return this Path as a URI + */ + public URI toUri() { return uri; } + + /** + * Returns true if the path component (i.e. directory) of this URI is + * absolute and the scheme is null, and the authority + * is null. + * + * @return whether the path is absolute and the URI has no scheme nor + * authority parts + */ + public boolean isAbsoluteAndSchemeAuthorityNull() { + return (isUriPathAbsolute() && + uri.getScheme() == null && uri.getAuthority() == null); + } + + /** + * Returns true if the path component (i.e. directory) of this URI is + * absolute. + * + * @return whether this URI's path is absolute + */ + public boolean isUriPathAbsolute() { + int start = startPositionWithoutWindowsDrive(uri.getPath()); + return uri.getPath().startsWith(SEPARATOR, start); + } + + /** + * Returns true if the path component (i.e. directory) of this URI is + * absolute. This method is a wrapper for {@link #isUriPathAbsolute()}. + * + * @return whether this URI's path is absolute + */ + public boolean isAbsolute() { + return isUriPathAbsolute(); + } + + /** + * Returns true if and only if this path represents the root of a file system. + * + * @return true if and only if this path represents the root of a file system + */ + public boolean isRoot() { + return getParent() == null; + } + + /** + * Returns the final component of this path. + * + * @return the final component of this path + */ + public String getName() { + String path = uri.getPath(); + int slash = path.lastIndexOf(SEPARATOR); + return path.substring(slash+1); + } + + /** + * Returns the parent of a path or null if at root. + * @return the parent of a path or null if at root + */ + public Path getParent() { + String path = uri.getPath(); + int lastSlash = path.lastIndexOf('/'); + int start = startPositionWithoutWindowsDrive(path); + if ((path.length() == start) || // empty path + (lastSlash == start && path.length() == start+1)) { // at root + return null; + } + String parent; + if (lastSlash==-1) { + parent = CUR_DIR; + } else { + parent = path.substring(0, lastSlash==start?start+1:lastSlash); + } + return new Path(uri.getScheme(), uri.getAuthority(), parent); + } + + /** + * Adds a suffix to the final name in the path. + * + * @param suffix the suffix to add + * @return a new path with the suffix added + */ + public Path suffix(String suffix) { + return new Path(getParent(), getName()+suffix); + } + + @Override + public String toString() { + // we can't use uri.toString(), which escapes everything, because we want + // illegal characters unescaped in the string, for glob processing, etc. + StringBuilder buffer = new StringBuilder(); + if (uri.getScheme() != null) { + buffer.append(uri.getScheme()) + .append(":"); + } + if (uri.getAuthority() != null) { + buffer.append("//") + .append(uri.getAuthority()); + } + if (uri.getPath() != null) { + String path = uri.getPath(); + if (path.indexOf('/')==0 && + hasWindowsDrive(path) && // has windows drive + uri.getScheme() == null && // but no scheme + uri.getAuthority() == null) // or authority + path = path.substring(1); // remove slash before drive + buffer.append(path); + } + if (uri.getFragment() != null) { + buffer.append("#") + .append(uri.getFragment()); + } + return buffer.toString(); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Path)) { + return false; + } + Path that = (Path)o; + return this.uri.equals(that.uri); + } + + @Override + public int hashCode() { + return uri.hashCode(); + } + + @Override + public int compareTo(Path o) { + return this.uri.compareTo(o.uri); + } + + /** + * Returns the number of elements in this path. + * @return the number of elements in this path + */ + public int depth() { + String path = uri.getPath(); + int depth = 0; + int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; + while (slash != -1) { + depth++; + slash = path.indexOf(SEPARATOR, slash+1); + } + return depth; + } + + /** + * Returns a qualified path object. + * + * @param defaultUri if this path is missing the scheme or authority + * components, borrow them from this URI + * @param workingDir if this path isn't absolute, treat it as relative to this + * working directory + * @return this path if it contains a scheme and authority and is absolute, or + * a new path that includes a path and authority and is fully qualified + */ + public Path makeQualified(URI defaultUri, Path workingDir) { + Path path = this; + if (!isAbsolute()) { + path = new Path(workingDir, this); + } + + URI pathUri = path.toUri(); + + String scheme = pathUri.getScheme(); + String authority = pathUri.getAuthority(); + String fragment = pathUri.getFragment(); + + if (scheme != null && + (authority != null || defaultUri.getAuthority() == null)) + return path; + + if (scheme == null) { + scheme = defaultUri.getScheme(); + } + + if (authority == null) { + authority = defaultUri.getAuthority(); + if (authority == null) { + authority = ""; + } + } + + URI newUri = null; + try { + newUri = new URI(scheme, authority , + normalizePath(scheme, pathUri.getPath()), null, fragment); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + return new Path(newUri); + } + + /** + * Validate the contents of a deserialized Path, so as + * to defend against malicious object streams. + * @throws InvalidObjectException if there's no URI + */ + @Override + public void validateObject() throws InvalidObjectException { + if (uri == null) { + throw new InvalidObjectException("No URI in deserialized Path"); + } + + } +} \ No newline at end of file diff --git a/core/src/main/java/io/delta/core/helpers/ScanHelper.java b/kernel/src/main/java/io/delta/core/helpers/ScanHelper.java similarity index 100% rename from core/src/main/java/io/delta/core/helpers/ScanHelper.java rename to kernel/src/main/java/io/delta/core/helpers/ScanHelper.java diff --git a/core/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java b/kernel/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java similarity index 100% rename from core/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java rename to kernel/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java diff --git a/core/src/main/java/io/delta/core/helpers/TableHelper.java b/kernel/src/main/java/io/delta/core/helpers/TableHelper.java similarity index 75% rename from core/src/main/java/io/delta/core/helpers/TableHelper.java rename to kernel/src/main/java/io/delta/core/helpers/TableHelper.java index e28480b95..c9244717b 100644 --- a/core/src/main/java/io/delta/core/helpers/TableHelper.java +++ b/kernel/src/main/java/io/delta/core/helpers/TableHelper.java @@ -1,5 +1,7 @@ package io.delta.core.helpers; +import java.io.FileNotFoundException; + import io.delta.core.data.Row; import io.delta.core.expressions.Expression; import io.delta.core.fs.FileStatus; @@ -8,11 +10,10 @@ public interface TableHelper { - CloseableIterator listFiles(String path); - - CloseableIterator listFiles(String path, String prefixToListFrom); + CloseableIterator listFiles(String path) throws FileNotFoundException; - CloseableIterator readJsonFile(String path, StructType readSchema); + // TODO: we should update LogStore.java :: read to throw a FileNotFoundException + CloseableIterator readJsonFile(String path, StructType readSchema) throws FileNotFoundException; /** Uses the readSchema for partition pruning. */ CloseableIterator readParquetFile(String path, StructType readSchema); diff --git a/kernel/src/main/java/io/delta/core/internal/LogSegment.java b/kernel/src/main/java/io/delta/core/internal/LogSegment.java new file mode 100644 index 000000000..4fc449509 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/LogSegment.java @@ -0,0 +1,45 @@ +package io.delta.core.internal; + +import java.util.List; +import java.util.Optional; + +import io.delta.core.fs.FileStatus; +import io.delta.core.fs.Path; + +public class LogSegment { + public final Path logPath; + public final long version; + public final List deltas; + public final List checkpoints; + public final Optional checkpointVersionOpt; + public final long lastCommitTimestamp; + + /** + * Provides information around which files in the transaction log need to be read to create + * the given version of the log. + * + * @param logPath The path to the _delta_log directory + * @param version The Snapshot version to generate + * @param deltas The delta commit files (.json) to read + * @param checkpoints The checkpoint file(s) to read + * @param checkpointVersionOpt The checkpoint version used to start replay + * @param lastCommitTimestamp The "unadjusted" timestamp of the last commit within this segment. + * By unadjusted, we mean that the commit timestamps may not + * necessarily be monotonically increasing for the commits within + * this segment. + */ + public LogSegment( + Path logPath, + long version, + List deltas, + List checkpoints, + Optional checkpointVersionOpt, + long lastCommitTimestamp) { + this.logPath = logPath; + this.version = version; + this.deltas = deltas; + this.checkpoints = checkpoints; + this.checkpointVersionOpt = checkpointVersionOpt; + this.lastCommitTimestamp = lastCommitTimestamp; + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/TableImpl.java b/kernel/src/main/java/io/delta/core/internal/TableImpl.java new file mode 100644 index 000000000..4adfec676 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/TableImpl.java @@ -0,0 +1,76 @@ +package io.delta.core.internal; + +import java.net.URI; +import java.nio.file.Paths; +import java.util.concurrent.Callable; +import java.util.concurrent.locks.ReentrantLock; + +import io.delta.core.Snapshot; +import io.delta.core.Table; +import io.delta.core.helpers.TableHelper; +import io.delta.core.internal.checkpoint.Checkpointer; +import io.delta.core.fs.Path; +import io.delta.core.internal.snapshot.SnapshotManager; +import io.delta.core.internal.util.Logging; + +public class TableImpl implements Table, Logging { + + public static Table forPath(String path, TableHelper helper) { + // TODO: take in a configuration and use conf.get("fs.defaultFS") + final URI defaultUri = URI.create("file:///"); + final Path workingDir = new Path(Paths.get(".").toAbsolutePath().toUri()); + + final Path dataPath = new Path(path).makeQualified(defaultUri, workingDir); + final Path logPath = new Path(dataPath, "_delta_log"); + + return new TableImpl(logPath, dataPath, helper); + } + + public final Path logPath; + public final Path dataPath; + public final TableHelper tableHelper; + public final Checkpointer checkpointer; + public final SnapshotManager snapshotManager; + + private final ReentrantLock lock; + + public TableImpl( + Path logPath, + Path dataPath, + TableHelper tableHelper) { + logDebug( + String.format("TableImpl created with logPath %s, dataPath %s", logPath, dataPath) + ); + + this.logPath = logPath; + this.dataPath = dataPath; + this.tableHelper = tableHelper; + + this.lock = new ReentrantLock(); + this.checkpointer = new Checkpointer(this); + this.snapshotManager = new SnapshotManager(this); + } + + @Override + public Snapshot getLatestSnapshot() { + return snapshotManager.update(); + } + + public T lockInterruptibly(Callable body) { + try { + lock.lockInterruptibly(); + + try { + return body.call(); + } catch (Exception e) { + // failed body.call() + throw new RuntimeException(e); + } finally { + lock.unlock(); + } + } catch (InterruptedException e) { + // failed lock.lockInterruptibly(); + throw new RuntimeException(e); + } + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/actions/Action.java b/kernel/src/main/java/io/delta/core/internal/actions/Action.java new file mode 100644 index 000000000..a261a36a6 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/actions/Action.java @@ -0,0 +1,5 @@ +package io.delta.core.internal.actions; + +public interface Action { + +} diff --git a/kernel/src/main/java/io/delta/core/internal/actions/AddFile.java b/kernel/src/main/java/io/delta/core/internal/actions/AddFile.java new file mode 100644 index 000000000..3aa81357a --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/actions/AddFile.java @@ -0,0 +1,5 @@ +package io.delta.core.internal.actions; + +public class AddFile extends FileAction { + +} diff --git a/kernel/src/main/java/io/delta/core/internal/actions/FileAction.java b/kernel/src/main/java/io/delta/core/internal/actions/FileAction.java new file mode 100644 index 000000000..84844c4b4 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/actions/FileAction.java @@ -0,0 +1,5 @@ +package io.delta.core.internal.actions; + +public abstract class FileAction implements Action { + +} diff --git a/kernel/src/main/java/io/delta/core/internal/checkpoint/CheckpointInstance.java b/kernel/src/main/java/io/delta/core/internal/checkpoint/CheckpointInstance.java new file mode 100644 index 000000000..81ee39dad --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/checkpoint/CheckpointInstance.java @@ -0,0 +1,59 @@ +package io.delta.core.internal.checkpoint; + +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import io.delta.core.fs.Path; +import io.delta.core.internal.lang.Ordered; +import io.delta.core.internal.util.FileNames; + +public class CheckpointInstance implements Ordered { + public static final CheckpointInstance MAX_VALUE = new CheckpointInstance(-1); + + public final long version; + public final Optional numParts; + + public CheckpointInstance(Path path) { + this(FileNames.getFileVersion(path)); + } + + public CheckpointInstance(long version) { + this(version, Optional.empty()); + } + + public CheckpointInstance(long version, Optional numParts) { + this.version = version; + this.numParts = numParts; + } + + boolean isNotLaterThan(CheckpointInstance other) { + if (other == CheckpointInstance.MAX_VALUE) return true; + return version <= other.version; + } + + public List getCorrespondingFiles(Path path) { + assert (this != CheckpointInstance.MAX_VALUE) : "Can't get files for CheckpointVersion.MaxValue."; + return numParts + .map(parts -> FileNames.checkpointFileWithParts(path, version, parts)) + .orElseGet(() -> Collections.singletonList(FileNames.checkpointFileSingular(path, version))); + } + + @Override + public int compareTo(CheckpointInstance that) { + if (version == that.version) { + return numParts.orElse(1) - that.numParts.orElse(1); + } else { + // we need to guard against overflow. We just can't return (this - that).toInt + return version - that.version < 0 ? -1 : 1; + } + } + + @Override + public String toString() { + return "CheckpointInstance{" + + "version=" + version + + ", numParts=" + numParts + + '}'; + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/checkpoint/CheckpointMetaData.java b/kernel/src/main/java/io/delta/core/internal/checkpoint/CheckpointMetaData.java new file mode 100644 index 000000000..4d1cb6491 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/checkpoint/CheckpointMetaData.java @@ -0,0 +1,35 @@ +package io.delta.core.internal.checkpoint; + +import io.delta.core.data.Row; +import io.delta.core.types.LongType; +import io.delta.core.types.StructType; + +public class CheckpointMetaData { + + public static CheckpointMetaData fromRow(Row row) { + return new CheckpointMetaData( + row.getLong(0), + row.getLong(1) + ); + } + + public static StructType READ_SCHEMA = new StructType() + .add("version", new LongType()) + .add("size", new LongType()); + + public final long version; + public final long size; + + public CheckpointMetaData(long version, long size) { + this.version = version; + this.size = size; + } + + @Override + public String toString() { + return "CheckpointMetaData{" + + "version=" + version + + ", size=" + size + + '}'; + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/checkpoint/Checkpointer.java b/kernel/src/main/java/io/delta/core/internal/checkpoint/Checkpointer.java new file mode 100644 index 000000000..0e5657c05 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/checkpoint/Checkpointer.java @@ -0,0 +1,92 @@ +package io.delta.core.internal.checkpoint; + +import java.io.FileNotFoundException; +import java.util.*; +import java.util.stream.Collectors; + +import io.delta.core.data.Row; +import io.delta.core.fs.Path; +import io.delta.core.internal.TableImpl; +import io.delta.core.internal.util.Logging; +import io.delta.core.utils.CloseableIterator; + +public class Checkpointer implements Logging { + + //////////////////// + // Static Methods // + //////////////////// + + /** The name of the last checkpoint file */ + public static final String LAST_CHECKPOINT_FILE_NAME = "_last_checkpoint"; + + /** + * Given a list of checkpoint files, pick the latest complete checkpoint instance which is not + * later than `notLaterThan`. + */ + public static Optional getLatestCompleteCheckpointFromList( + List instances, + CheckpointInstance notLaterThan) { + final List completeCheckpoints = instances + .stream() + .filter(c -> c.isNotLaterThan(notLaterThan)) + .collect(Collectors.groupingBy(c -> c)) + // Map> + .entrySet() + .stream() + .filter(entry -> { + final CheckpointInstance key = entry.getKey(); + final List inst = entry.getValue(); + + if (key.numParts.isPresent()) { + return inst.size() == entry.getKey().numParts.get(); + } else { + return inst.size() == 1; + } + }) + .map(Map.Entry::getKey) + .collect(Collectors.toList()); + + if (completeCheckpoints.isEmpty()) { + return Optional.empty(); + } else { + return Optional.of(Collections.max(completeCheckpoints)); + } + } + + /////////////////////////////// + // Instance Fields / Methods // + /////////////////////////////// + + /** The path to the file that holds metadata about the most recent checkpoint. */ + private final Path LAST_CHECKPOINT; + + private final TableImpl tableImpl; + + public Checkpointer(TableImpl tableImpl) { + this.tableImpl = tableImpl; + + this.LAST_CHECKPOINT = new Path(tableImpl.logPath, LAST_CHECKPOINT_FILE_NAME); + } + + /** Returns information about the most recent checkpoint. */ + public Optional readLastCheckpointFile() { + return loadMetadataFromFile(0); + } + + /** Loads the checkpoint metadata from the _last_checkpoint file. */ + private Optional loadMetadataFromFile(int tries) { + try { + final CloseableIterator jsonIter = tableImpl + .tableHelper + .readJsonFile(LAST_CHECKPOINT.toString(), CheckpointMetaData.READ_SCHEMA); + + if (!jsonIter.hasNext()) { + return Optional.empty(); + } + + return Optional.of(CheckpointMetaData.fromRow(jsonIter.next())); + } catch (FileNotFoundException ex) { + return Optional.empty(); + } + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/checksum/VersionChecksum.java b/kernel/src/main/java/io/delta/core/internal/checksum/VersionChecksum.java new file mode 100644 index 000000000..a5ac20653 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/checksum/VersionChecksum.java @@ -0,0 +1,4 @@ +package io.delta.core.internal.checksum; + +public class VersionChecksum { +} diff --git a/kernel/src/main/java/io/delta/core/internal/lang/ListUtils.java b/kernel/src/main/java/io/delta/core/internal/lang/ListUtils.java new file mode 100644 index 000000000..22ea32aa9 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/lang/ListUtils.java @@ -0,0 +1,18 @@ +package io.delta.core.internal.lang; + +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +public final class ListUtils { + + private ListUtils() { } + + public static Tuple2, List> partition(List list, Predicate predicate) { + final Map> partitionMap = list + .stream() + .collect(Collectors.partitioningBy(predicate)); + return new Tuple2<>(partitionMap.get(true), partitionMap.get(false)); + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/lang/Optional.java b/kernel/src/main/java/io/delta/core/internal/lang/Optional.java new file mode 100644 index 000000000..6a1f38194 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/lang/Optional.java @@ -0,0 +1,7 @@ +package io.delta.core.internal.lang; + +public class Optional { + // exists: Optional.map(v -> f(v)).orElse(false) + + // forall Optional.map(v -> f(v)).orElse(true) +} diff --git a/kernel/src/main/java/io/delta/core/internal/lang/Ordered.java b/kernel/src/main/java/io/delta/core/internal/lang/Ordered.java new file mode 100644 index 000000000..15ce429f7 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/lang/Ordered.java @@ -0,0 +1,20 @@ +package io.delta.core.internal.lang; + +public interface Ordered extends Comparable { + + default boolean lessThan(T that) { + return this.compareTo(that) < 0; + } + + default boolean lessThanOrEqualTo(T that) { + return this.compareTo(that) <= 0; + } + + default boolean greaterThan(T that) { + return this.compareTo(that) > 0; + } + + default boolean greaterThanOrEqualTo(T that) { + return this.compareTo(that) >= 0; + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/lang/Tuple2.java b/kernel/src/main/java/io/delta/core/internal/lang/Tuple2.java new file mode 100644 index 000000000..f73f2e185 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/lang/Tuple2.java @@ -0,0 +1,12 @@ +package io.delta.core.internal.lang; + +public class Tuple2 { + + public final K _1; + public final V _2; + + public Tuple2(K _1, V _2){ + this._1 = _1; + this._2 = _2; + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/snapshot/InitialSnapshot.java b/kernel/src/main/java/io/delta/core/internal/snapshot/InitialSnapshot.java new file mode 100644 index 000000000..7e80b5b88 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/snapshot/InitialSnapshot.java @@ -0,0 +1,8 @@ +package io.delta.core.internal.snapshot; + +public class InitialSnapshot extends SnapshotImpl { + public InitialSnapshot() { + // TODO + super(null, -1, null, null, -1); + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/snapshot/SnapshotImpl.java b/kernel/src/main/java/io/delta/core/internal/snapshot/SnapshotImpl.java new file mode 100644 index 000000000..3465d0105 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/snapshot/SnapshotImpl.java @@ -0,0 +1,44 @@ +package io.delta.core.internal.snapshot; + +import io.delta.core.ScanBuilder; +import io.delta.core.Snapshot; +import io.delta.core.fs.Path; +import io.delta.core.internal.LogSegment; +import io.delta.core.internal.TableImpl; +import io.delta.core.types.StructType; + +public class SnapshotImpl implements Snapshot { + private final Path path; + private final long version; + private final LogSegment logSegment; + private final TableImpl tableImpl; + private final long timestamp; + + public SnapshotImpl( + Path path, + long version, + LogSegment logSegment, + TableImpl tableImpl, + long timestamp) { + this.path = path; + this.version = version; + this.logSegment = logSegment; + this.tableImpl = tableImpl; + this.timestamp = timestamp; + } + + @Override + public long getVersion() { + return version; + } + + @Override + public StructType getSchema() { + return null; + } + + @Override + public ScanBuilder getScanBuilder() { + return null; + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/snapshot/SnapshotManager.java b/kernel/src/main/java/io/delta/core/internal/snapshot/SnapshotManager.java new file mode 100644 index 000000000..b89d842b4 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/snapshot/SnapshotManager.java @@ -0,0 +1,440 @@ +package io.delta.core.internal.snapshot; + +import java.io.FileNotFoundException; +import java.util.*; +import java.util.stream.Collectors; + +import io.delta.core.Snapshot; +import io.delta.core.fs.FileStatus; +import io.delta.core.fs.Path; +import io.delta.core.internal.LogSegment; +import io.delta.core.internal.TableImpl; +import io.delta.core.internal.checkpoint.CheckpointInstance; +import io.delta.core.internal.checkpoint.CheckpointMetaData; +import io.delta.core.internal.checkpoint.Checkpointer; +import io.delta.core.internal.checksum.VersionChecksum; +import io.delta.core.internal.lang.ListUtils; +import io.delta.core.internal.lang.Tuple2; +import io.delta.core.internal.util.FileNames; +import io.delta.core.internal.util.Logging; +import io.delta.core.utils.CloseableIterator; + +public class SnapshotManager implements Logging { + + ///////////////////////////// + // Static Fields / Methods // + ///////////////////////////// + + /** + * - Verify the versions are contiguous. + * - Verify the versions start with `expectedStartVersion` if it's specified. + * - Verify the versions end with `expectedEndVersion` if it's specified. + */ + public static void verifyDeltaVersions( + List versions, + Optional expectedStartVersion, + Optional expectedEndVersion) { + if (!versions.isEmpty()) { + // TODO(SCOTT): check if contiguous + } + expectedStartVersion.ifPresent(v -> { + assert (!versions.isEmpty() && Objects.equals(versions.get(0), v)) : + String.format("Did not get the first delta file version %s to compute Snapshot", v); + }); + expectedEndVersion.ifPresent(v -> { + assert (!versions.isEmpty() && Objects.equals(versions.get(versions.size() - 1), v)) : + String.format("Did not get the last delta file version %s to compute Snapshot", v); + }); + } + + /////////////////////////////// + // Instance Fields / Methods // + /////////////////////////////// + + private final TableImpl tableImpl; + volatile private Snapshot currentSnapshot; + + public SnapshotManager(TableImpl tableImpl) { + this.tableImpl = tableImpl; + this.currentSnapshot = getSnapshotAtInit(); + } + + ///////////////// + // Public APIs // + ///////////////// + + /** + * Update current snapshot by applying the new delta files if any. + */ + public Snapshot update() { + // TODO + return currentSnapshot; + } + + ////////////////// + // Private APIs // + ////////////////// + + /** Get an iterator of files in the _delta_log directory starting with the startVersion. */ + private CloseableIterator listFrom(long startVersion) throws FileNotFoundException { + logDebug(String.format("startVersion: %s", startVersion)); + return tableImpl + .tableHelper + .listFiles(FileNames.listingPrefix(tableImpl.logPath, startVersion)); + } + + /** Returns true if the path is delta log files. Delta log files can be delta commit file + * (e.g., 000000000.json), or checkpoint file. (e.g., 000000001.checkpoint.00001.00003.parquet) + * + * @param path Path of a file + * @return Boolean Whether the file is delta log files + */ + private boolean isDeltaCommitOrCheckpointFile(Path path) { + return FileNames.isCheckpointFile(path) || FileNames.isDeltaFile(path); + } + + /** Returns an iterator containing a list of files found from the provided path */ + private Optional> listFromOrNone(long startVersion) { + // LIST the directory, starting from the provided lower bound (treat missing dir as empty). + // NOTE: "empty/missing" is _NOT_ equivalent to "contains no useful commit files." + try { + CloseableIterator results = listFrom(startVersion); + if (results.hasNext()) { + return Optional.of(results); + } else { + return Optional.empty(); + } + } catch (FileNotFoundException e) { + return Optional.empty(); + } + } + + /** + * Returns the delta files and checkpoint files starting from the given `startVersion`. + * `versionToLoad` is an optional parameter to set the max bound. It's usually used to load a + * table snapshot for a specific version. + * + * @param startVersion the version to start. Inclusive. + * @param versionToLoad the optional parameter to set the max version we should return. Inclusive. + * @return Some array of files found (possibly empty, if no usable commit files are present), or + * None if the listing returned no files at all. + */ + protected final Optional> listDeltaAndCheckpointFiles( + long startVersion, + Optional versionToLoad) { + logDebug(String.format("startVersion: %s, versionToLoad: %s", startVersion, versionToLoad)); + + return listFromOrNone(startVersion).map(fileStatusesIter -> { + final List output = new ArrayList<>(); + + while(fileStatusesIter.hasNext()) { + final FileStatus fileStatus = fileStatusesIter.next(); + + // Pick up all checkpoint and delta files + if (!isDeltaCommitOrCheckpointFile(fileStatus.path())) { + continue; + } + + // Checkpoint files of 0 size are invalid but may be ignored silently when read, + // hence we drop them so that we never pick up such checkpoints. + if (FileNames.isCheckpointFile(fileStatus.path()) && fileStatus.length() == 0) { + continue; + } + + // Take files until the version we want to load + final boolean versionWithinRange = versionToLoad + .map(v -> FileNames.getFileVersion(fileStatus.path()) <= v) + .orElse(true); + + if (!versionWithinRange) { + break; + } + + output.add(fileStatus); + } + + return output; + }); + } + + /** + * Load the Snapshot for this Delta table at initialization. This method uses the + * `lastCheckpoint` file as a hint on where to start listing the transaction log directory. If + * the _delta_log directory doesn't exist, this method will return an `InitialSnapshot`. + */ + private SnapshotImpl getSnapshotAtInit() { + final long currentTimestamp = System.currentTimeMillis(); + final Optional lastCheckpointOpt = + tableImpl.checkpointer.readLastCheckpointFile(); + final Optional logSegmentOpt = getLogSegmentFrom(lastCheckpointOpt); + return logSegmentOpt + .map(logSegment -> createSnapshot(logSegment, lastCheckpointOpt, Optional.empty())) + .orElse(new InitialSnapshot()); + } + + private SnapshotImpl createSnapshot( + LogSegment initSegment, + Optional checkpointMetadataOptHint, + Optional checksumOpt) { + final String startingFromStr = initSegment + .checkpointVersionOpt + .map(v -> String.format(" starting from checkpoint version %s.", v)) + .orElse("."); + logInfo(() -> String.format("Loading version %s%s", initSegment.version, startingFromStr)); + + // TODO(SCOTT): createSnapshotFromGivenOrEquivalentLogSegment + + return new SnapshotImpl( + tableImpl.logPath, + initSegment.version, + initSegment, + tableImpl, + initSegment.lastCommitTimestamp + ); + } + + /** + * Get the LogSegment that will help in computing the Snapshot of the table at DeltaLog + * initialization, or None if the directory was empty/missing. + * + * @param startingCheckpoint A checkpoint that we can start our listing from + */ + private Optional getLogSegmentFrom( + Optional startingCheckpoint) { + logDebug(String.format("startingCheckpoint: %s ", startingCheckpoint)); + return getLogSegmentForVersion(startingCheckpoint.map(x -> x.version), Optional.empty()); + } + + /** + * Get a list of files that can be used to compute a Snapshot at version `versionToLoad`, If + * `versionToLoad` is not provided, will generate the list of files that are needed to load the + * latest version of the Delta table. This method also performs checks to ensure that the delta + * files are contiguous. + * + * @param startCheckpoint A potential start version to perform the listing of the DeltaLog, + * typically that of a known checkpoint. If this version's not provided, + * we will start listing from version 0. + * @param versionToLoad A specific version to load. Typically used with time travel and the + * Delta streaming source. If not provided, we will try to load the latest + * version of the table. + * @return Some LogSegment to build a Snapshot if files do exist after the given + * startCheckpoint. None, if the directory was missing or empty. + */ + private Optional getLogSegmentForVersion( + Optional startCheckpoint, + Optional versionToLoad) { + // List from the starting checkpoint. If a checkpoint doesn't exist, this will still return + // deltaVersion=0. + final Optional> newFiles = + listDeltaAndCheckpointFiles(startCheckpoint.orElse(0L), versionToLoad); + return getLogSegmentForVersion(startCheckpoint, versionToLoad, newFiles); + } + + /** + * Helper function for the getLogSegmentForVersion above. Called with a provided files list, + * and will then try to construct a new LogSegment using that. + */ + private Optional getLogSegmentForVersion( + final Optional startCheckpointOpt, + Optional versionToLoadOpt, + Optional> filesOpt) { + final List newFiles; + if (filesOpt.isPresent()) { + newFiles = filesOpt.get(); + } else { + // No files found even when listing from 0 => empty directory => table does not exist yet. + if (!startCheckpointOpt.isPresent()) return Optional.empty(); + + // FIXME(ryan.johnson): We always write the commit and checkpoint files before updating + // _last_checkpoint. If the listing came up empty, then we either encountered a + // list-after-put inconsistency in the underlying log store, or somebody corrupted the + // table by deleting files. Either way, we can't safely continue. + // + // For now, we preserve existing behavior by returning Array.empty, which will trigger a + // recursive call to [[getLogSegmentForVersion]] below (same as before the refactor). + newFiles = Collections.emptyList(); + } + logDebug(() -> + String.format( + "newFiles: %s", + Arrays.toString(newFiles.stream().map(x -> x.path().getName()).toArray()) + ) + ); + + if (newFiles.isEmpty() && !startCheckpointOpt.isPresent()) { + // We can't construct a snapshot because the directory contained no usable commit + // files... but we can't return Optional.empty either, because it was not truly empty. + throw new RuntimeException( + String.format("Empty directory: %s", tableImpl.logPath) + ); + } else if (newFiles.isEmpty()) { + // The directory may be deleted and recreated and we may have stale state in our DeltaLog + // singleton, so try listing from the first version + return getLogSegmentForVersion(Optional.empty(), versionToLoadOpt); + } + + Tuple2, List> checkpointsAndDeltas = ListUtils + .partition( + newFiles, + fileStatus -> FileNames.isCheckpointFile(fileStatus.path()) + ); + final List checkpoints = checkpointsAndDeltas._1; + final List deltas = checkpointsAndDeltas._2; + + logDebug(() -> + String.format( + "\ncheckpoints: %s\ndeltas: %s", + Arrays.toString(checkpoints.stream().map(x -> x.path().getName()).toArray()), + Arrays.toString(deltas.stream().map(x -> x.path().getName()).toArray()) + ) + ); + + // Find the latest checkpoint in the listing that is not older than the versionToLoad + final CheckpointInstance lastCheckpoint = versionToLoadOpt.map(CheckpointInstance::new) + .orElse(CheckpointInstance.MAX_VALUE); + logDebug(String.format("lastCheckpoint: %s", lastCheckpoint)); + + final List checkpointFiles = checkpoints + .stream() + .map(f -> new CheckpointInstance(f.path())) + .collect(Collectors.toList()); + logDebug(() -> String.format("checkpointFiles: %s", Arrays.toString(checkpointFiles.toArray()))); + + final Optional newCheckpointOpt = + Checkpointer.getLatestCompleteCheckpointFromList(checkpointFiles, lastCheckpoint); + logDebug(String.format("newCheckpointOpt: %s", newCheckpointOpt)); + + final long newCheckpointVersion = newCheckpointOpt + .map(c -> c.version) + .orElseGet(() -> { + // If we do not have any checkpoint, pass new checkpoint version as -1 so that first + // delta version can be 0. + startCheckpointOpt.map(startCheckpoint -> { + // `startCheckpointOpt` was given but no checkpoint found on delta log. This means that the + // last checkpoint we thought should exist (the `_last_checkpoint` file) no longer exists. + // Try to look up another valid checkpoint and create `LogSegment` from it. + // + // FIXME(ryan.johnson): Something has gone very wrong if the checkpoint doesn't + // exist at all. This code should only handle rejected incomplete checkpoints. + final long snapshotVersion = versionToLoadOpt.orElseGet(() -> { + final FileStatus lastDelta = deltas.get(deltas.size() - 1); + return FileNames.deltaVersion(lastDelta.path()); + }); + + return getLogSegmentWithMaxExclusiveCheckpointVersion(snapshotVersion, startCheckpoint) + .orElseThrow(() -> + // No alternative found, but the directory contains files so we cannot return None. + new RuntimeException( + String.format("Checkpoint file to load version: %s is missing.", startCheckpoint) + ) + ); + + }); + + return -1L; + }); + logDebug(String.format("newCheckpointVersion: %s", newCheckpointVersion)); + + // TODO(SCOTT): we can calculate deltasAfterCheckpoint and deltaVersions more efficiently + + // If there is a new checkpoint, start new lineage there. If `newCheckpointVersion` is -1, + // it will list all existing delta files. + final List deltasAfterCheckpoint = deltas + .stream() + .filter(fileStatus -> FileNames.deltaVersion(fileStatus.path()) > newCheckpointVersion) + .collect(Collectors.toList()); + + logDebug(() -> + String.format( + "deltasAfterCheckpoint: %s", + Arrays.toString(deltasAfterCheckpoint.stream().map(x -> x.path().getName()).toArray()) + ) + ); + + final LinkedList deltaVersions = deltasAfterCheckpoint + .stream() + .map(fileStatus -> FileNames.deltaVersion(fileStatus.path())) + .collect(Collectors.toCollection(LinkedList::new)); + + logDebug(() -> String.format("deltaVersions: %s", Arrays.toString(deltaVersions.toArray()))); + + // We may just be getting a checkpoint file after the filtering + if (!deltaVersions.isEmpty()) { + if (deltaVersions.getFirst() != newCheckpointVersion + 1) { + throw new RuntimeException( + String.format( + "Log file not found.\nExpected: %s\nFound:%s", + FileNames.deltaFile(tableImpl.logPath,newCheckpointVersion + 1), + FileNames.deltaFile(tableImpl.logPath,deltaVersions.get(0)) + ) + ); + } + verifyDeltaVersions(deltaVersions, Optional.of(newCheckpointVersion + 1), versionToLoadOpt); + } + + // TODO(SCOTT): double check newCheckpointOpt.get() won't error out + + final long newVersion = deltaVersions.isEmpty() ? newCheckpointOpt.get().version : deltaVersions.getLast(); + + // In the case where `deltasAfterCheckpoint` is empty, `deltas` should still not be empty, + // they may just be before the checkpoint version unless we have a bug in log cleanup. + if (deltas.isEmpty()) { + throw new IllegalStateException( + String.format("Could not find any delta files for version %s", newVersion) + ); + } + + if (versionToLoadOpt.map(v -> v != newVersion).orElse(false)) { + throw new IllegalStateException( + String.format("Trying to load a non-existent version %s", versionToLoadOpt.get()) + ); + } + + final long lastCommitTimestamp = deltas.get(deltas.size() - 1).modificationTime(); + + final List newCheckpointFiles = newCheckpointOpt.map(newCheckpoint -> { + final Set newCheckpointPaths = + new HashSet<>(newCheckpoint.getCorrespondingFiles(tableImpl.logPath)); + final List newCheckpointFileList = checkpoints + .stream() + .filter(f -> newCheckpointPaths.contains(f.path())) + .collect(Collectors.toList()); + assert (newCheckpointFileList.size() == newCheckpointPaths.size()) : + String.format( + "Failed in getting the file information for:\n%s\namong\n%s", + newCheckpointPaths.stream().map(Path::toString).collect(Collectors.toList()), + checkpoints + .stream() + .map(FileStatus::path) + .map(Path::toString) + .collect(Collectors.joining("\n - ")) + ); + return newCheckpointFileList; + }).orElse(Collections.emptyList()); + + return Optional.of( + new LogSegment( + tableImpl.logPath, + newVersion, + deltasAfterCheckpoint, + newCheckpointFiles, + newCheckpointOpt.map(x -> x.version), + lastCommitTimestamp + ) + ); + } + + /** + * Returns a [[LogSegment]] for reading `snapshotVersion` such that the segment's checkpoint + * version (if checkpoint present) is LESS THAN `maxExclusiveCheckpointVersion`. + * This is useful when trying to skip a bad checkpoint. Returns `None` when we are not able to + * construct such [[LogSegment]], for example, no checkpoint can be used but we don't have the + * entire history from version 0 to version `snapshotVersion`. + */ + private Optional getLogSegmentWithMaxExclusiveCheckpointVersion( + long snapshotVersion, + long maxExclusiveCheckpointVersion) { + // TODO + return Optional.empty(); + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/util/FileNames.java b/kernel/src/main/java/io/delta/core/internal/util/FileNames.java new file mode 100644 index 000000000..9b02e43a8 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/util/FileNames.java @@ -0,0 +1,102 @@ +package io.delta.core.internal.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +import io.delta.core.fs.Path; + +public final class FileNames { + + private FileNames() { } + + private static final Pattern DELTA_FILE_PATTERN = + Pattern.compile("\\d+\\.json"); + + private static final Pattern CHECKPOINT_FILE_PATTERN = + Pattern.compile("\\d+\\.checkpoint(\\.\\d+\\.\\d+)?\\.parquet"); + + /** Returns the delta (json format) path for a given delta file. */ + public static String deltaFile(Path path, long version) { + return String.format("%s/%020d.json", path, version); + } + + /** Returns the version for the given delta path. */ + public static long deltaVersion(Path path) { + return Long.parseLong(path.getName().split("\\.")[0]); + } + + /** Returns the version for the given checkpoint path. */ + public static long checkpointVersion(Path path) { + return Long.parseLong(path.getName().split("\\.")[0]); + } + + /** + * Returns the prefix of all delta log files for the given version. + * + * Intended for use with listFrom to get all files from this version onwards. The returned Path + * will not exist as a file. + */ + public static String listingPrefix(Path path, long version) { + return String.format("%s/%020d.", path, version); + } + + /** + * Returns the path for a singular checkpoint up to the given version. + * + * In a future protocol version this path will stop being written. + */ + public static Path checkpointFileSingular(Path path, long version) { + return new Path(path, String.format("%020d.checkpoint.parquet", version)); + } + + /** + * Returns the paths for all parts of the checkpoint up to the given version. + * + * In a future protocol version we will write this path instead of checkpointFileSingular. + * + * Example of the format: 00000000000000004915.checkpoint.0000000020.0000000060.parquet is + * checkpoint part 20 out of 60 for the snapshot at version 4915. Zero padding is for + * lexicographic sorting. + */ + public static List checkpointFileWithParts(Path path, long version, int numParts) { + final List output = new ArrayList<>(); + for (int i = 1; i < numParts + 1; i++) { + output.add( + new Path( + path, + String.format("%020d.checkpoint.%010d.%010d.parquet", i, numParts, version) + ) + ); + } + return output; + } + + public static boolean isCheckpointFile(Path path) { + return CHECKPOINT_FILE_PATTERN.matcher(path.getName()).find(); + } + + public static boolean isDeltaFile(Path path) { + return DELTA_FILE_PATTERN.matcher(path.getName()).find(); + } + + /** + * Get the version of the checkpoint, checksum or delta file. Throws an error if an unexpected + * file type is seen. These unexpected files should be filtered out to ensure forward + * compatibility in cases where new file types are added, but without an explicit protocol + * upgrade. + */ + public static long getFileVersion(Path path) { + if (isCheckpointFile(path)) { + return checkpointVersion(path); + } else if (isDeltaFile(path)) { + return deltaVersion(path); +// } else if (isChecksumFile(path)) { +// checksumVersion(path) + } else { + throw new AssertionError( + String.format("Unexpected file type found in transaction log: %s", path) + ); + } + } +} diff --git a/kernel/src/main/java/io/delta/core/internal/util/Logging.java b/kernel/src/main/java/io/delta/core/internal/util/Logging.java new file mode 100644 index 000000000..076ab65e7 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/internal/util/Logging.java @@ -0,0 +1,21 @@ +package io.delta.core.internal.util; + +import java.util.function.Supplier; + +public interface Logging { + + default void logInfo(String msg) { + System.out.println(this.getClass() + " :: " + msg); + } + + default void logInfo(Supplier msg) { + System.out.println(this.getClass() + " :: " + msg.get()); + } + + default void logDebug(String msg) { + System.out.println(this.getClass() + " :: " + msg); + } + default void logDebug(Supplier msg) { + System.out.println(this.getClass() + " :: " + msg.get()); + } +} diff --git a/kernel/src/main/java/io/delta/core/types/DataType.java b/kernel/src/main/java/io/delta/core/types/DataType.java new file mode 100644 index 000000000..368093d6f --- /dev/null +++ b/kernel/src/main/java/io/delta/core/types/DataType.java @@ -0,0 +1,14 @@ +package io.delta.core.types; + +import java.util.Locale; + +public abstract class DataType { + public String typeName() { + String name = this.getClass().getSimpleName(); + if (name.endsWith("Type")) { + name = name.substring(0, name.length() - 4); + } + return name.toLowerCase(Locale.ROOT); + } +} + diff --git a/kernel/src/main/java/io/delta/core/types/LongType.java b/kernel/src/main/java/io/delta/core/types/LongType.java new file mode 100644 index 000000000..2e222bd48 --- /dev/null +++ b/kernel/src/main/java/io/delta/core/types/LongType.java @@ -0,0 +1,4 @@ +package io.delta.core.types; + +public class LongType extends DataType { +} diff --git a/kernel/src/main/java/io/delta/core/types/StructField.java b/kernel/src/main/java/io/delta/core/types/StructField.java new file mode 100644 index 000000000..4d089f8fd --- /dev/null +++ b/kernel/src/main/java/io/delta/core/types/StructField.java @@ -0,0 +1,14 @@ +package io.delta.core.types; + +public class StructField { + public final String name; + public final DataType dataType; + public final boolean nullable; + // private final FieldMetadata metadata; + + public StructField(String name, DataType dataType, boolean nullable) { + this.name = name; + this.dataType = dataType; + this.nullable = nullable; + } +} diff --git a/kernel/src/main/java/io/delta/core/types/StructType.java b/kernel/src/main/java/io/delta/core/types/StructType.java new file mode 100644 index 000000000..60604c89c --- /dev/null +++ b/kernel/src/main/java/io/delta/core/types/StructType.java @@ -0,0 +1,49 @@ +package io.delta.core.types; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public final class StructType extends DataType { + private final List fields; + + public StructType() { + this(new ArrayList<>()); + } + + public StructType(List fields) { + this.fields = fields; + } + + public StructType add(StructField field) { + final List fieldsCopy = new ArrayList<>(fields); + fieldsCopy.add(field); + + return new StructType(fieldsCopy); + } + + public StructType add(String name, DataType dataType) { + return add(new StructField(name, dataType, true /* nullable */)); + } + + public List fields() { + return Collections.unmodifiableList(fields); + } + + public List fieldNames() { + return fields.stream().map(f -> f.name).collect(Collectors.toList()); + } + + public int length() { + return fields.size(); + } + + public StructField at(int index) { + return fields.get(index); + } + + public String treeString() { + return "TODO"; + } +} diff --git a/core/src/main/java/io/delta/core/utils/CloseableIterator.java b/kernel/src/main/java/io/delta/core/utils/CloseableIterator.java similarity index 100% rename from core/src/main/java/io/delta/core/utils/CloseableIterator.java rename to kernel/src/main/java/io/delta/core/utils/CloseableIterator.java diff --git a/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala b/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala index ec8f6b375..467d5647d 100644 --- a/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala +++ b/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala @@ -17,6 +17,7 @@ package io.delta.standalone.internal import java.io.IOException +import java.nio.file.Paths import java.sql.Timestamp import java.util.TimeZone import java.util.concurrent.locks.ReentrantLock