From bb15bf8688358fe7d8f8266785e98e70a0f11329 Mon Sep 17 00:00:00 2001 From: Scott Sandre Date: Thu, 16 Mar 2023 12:34:50 -0700 Subject: [PATCH] initial skeleton --- build.sbt | 17 ++++++++ .../java/io/delta/core/ColumnMappingMode.java | 18 ++++++++ core/src/main/java/io/delta/core/Scan.java | 8 ++++ .../main/java/io/delta/core/ScanBuilder.java | 15 +++++++ .../src/main/java/io/delta/core/ScanTask.java | 9 ++++ .../src/main/java/io/delta/core/Snapshot.java | 10 +++++ core/src/main/java/io/delta/core/Table.java | 12 ++++++ .../java/io/delta/core/data/ColumnVector.java | 4 ++ .../io/delta/core/data/ColumnarBatch.java | 15 +++++++ .../io/delta/core/data/DeletionVector.java | 5 +++ .../src/main/java/io/delta/core/data/Row.java | 42 +++++++++++++++++++ .../io/delta/core/expressions/Expression.java | 4 ++ .../java/io/delta/core/fs/FileStatus.java | 10 +++++ .../io/delta/core/helpers/ScanHelper.java | 26 ++++++++++++ .../core/helpers/SupportsDeletionVector.java | 9 ++++ .../io/delta/core/helpers/TableHelper.java | 29 +++++++++++++ .../java/io/delta/core/types/DataType.java | 4 ++ .../java/io/delta/core/types/StructType.java | 3 ++ .../delta/core/utils/CloseableIterator.java | 6 +++ .../core/helpers/DefaultTableHelper.java | 4 ++ 20 files changed, 250 insertions(+) create mode 100644 core/src/main/java/io/delta/core/ColumnMappingMode.java create mode 100644 core/src/main/java/io/delta/core/Scan.java create mode 100644 core/src/main/java/io/delta/core/ScanBuilder.java create mode 100644 core/src/main/java/io/delta/core/ScanTask.java create mode 100644 core/src/main/java/io/delta/core/Snapshot.java create mode 100644 core/src/main/java/io/delta/core/Table.java create mode 100644 core/src/main/java/io/delta/core/data/ColumnVector.java create mode 100644 core/src/main/java/io/delta/core/data/ColumnarBatch.java create mode 100644 core/src/main/java/io/delta/core/data/DeletionVector.java create mode 100644 core/src/main/java/io/delta/core/data/Row.java create mode 100644 core/src/main/java/io/delta/core/expressions/Expression.java create mode 100644 core/src/main/java/io/delta/core/fs/FileStatus.java create mode 100644 core/src/main/java/io/delta/core/helpers/ScanHelper.java create mode 100644 core/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java create mode 100644 core/src/main/java/io/delta/core/helpers/TableHelper.java create mode 100644 core/src/main/java/io/delta/core/types/DataType.java create mode 100644 core/src/main/java/io/delta/core/types/StructType.java create mode 100644 core/src/main/java/io/delta/core/utils/CloseableIterator.java create mode 100644 default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java diff --git a/build.sbt b/build.sbt index 98248beb2..7cd073729 100644 --- a/build.sbt +++ b/build.sbt @@ -796,3 +796,20 @@ lazy val flink = (project in file("flink")) // Ensure unidoc is run with tests. Must be cleaned before test for unidoc to be generated. (Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value ) + +lazy val core = (project in file("core")) + .settings( + name := "delta-core", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq() + ) + +lazy val defaultCore = (project in file("default-core")) + .dependsOn(core) + .settings( + name := "delta-core-default", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq() + ) diff --git a/core/src/main/java/io/delta/core/ColumnMappingMode.java b/core/src/main/java/io/delta/core/ColumnMappingMode.java new file mode 100644 index 000000000..66cc175cc --- /dev/null +++ b/core/src/main/java/io/delta/core/ColumnMappingMode.java @@ -0,0 +1,18 @@ +package io.delta.core; + +public enum ColumnMappingMode { + NONE("none"), + ID_MAPPING("id"), + NAME_MAPPING("name"); + + private final String name; + + ColumnMappingMode(String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } +} diff --git a/core/src/main/java/io/delta/core/Scan.java b/core/src/main/java/io/delta/core/Scan.java new file mode 100644 index 000000000..288821c49 --- /dev/null +++ b/core/src/main/java/io/delta/core/Scan.java @@ -0,0 +1,8 @@ +package io.delta.core; + +import io.delta.core.utils.CloseableIterator; + +public interface Scan { + + CloseableIterator getTasks(); +} diff --git a/core/src/main/java/io/delta/core/ScanBuilder.java b/core/src/main/java/io/delta/core/ScanBuilder.java new file mode 100644 index 000000000..6b5f54afa --- /dev/null +++ b/core/src/main/java/io/delta/core/ScanBuilder.java @@ -0,0 +1,15 @@ +package io.delta.core; + +import io.delta.core.expressions.Expression; +import io.delta.core.types.StructType; + +public interface ScanBuilder { + + /** For column pruning. */ + ScanBuilder withReadSchema(StructType schema); + + /** For partition pruning and filter push down. */ + ScanBuilder withFilter(Expression filter); + + Scan build(); +} diff --git a/core/src/main/java/io/delta/core/ScanTask.java b/core/src/main/java/io/delta/core/ScanTask.java new file mode 100644 index 000000000..c4701d63b --- /dev/null +++ b/core/src/main/java/io/delta/core/ScanTask.java @@ -0,0 +1,9 @@ +package io.delta.core; + +import io.delta.core.data.ColumnarBatch; +import io.delta.core.utils.CloseableIterator; + +public interface ScanTask { + + CloseableIterator getData(); +} diff --git a/core/src/main/java/io/delta/core/Snapshot.java b/core/src/main/java/io/delta/core/Snapshot.java new file mode 100644 index 000000000..2bc3e45d6 --- /dev/null +++ b/core/src/main/java/io/delta/core/Snapshot.java @@ -0,0 +1,10 @@ +package io.delta.core; + +import io.delta.core.types.StructType; + +public interface Snapshot { + + StructType getSchema(); + + ScanBuilder getScanBuilder(); +} diff --git a/core/src/main/java/io/delta/core/Table.java b/core/src/main/java/io/delta/core/Table.java new file mode 100644 index 000000000..b79834519 --- /dev/null +++ b/core/src/main/java/io/delta/core/Table.java @@ -0,0 +1,12 @@ +package io.delta.core; + +import io.delta.core.helpers.TableHelper; + +public interface Table { + + static Table forPath(String path, TableHelper helper) { + return null; + } + + Snapshot getLatestSnapshot(); +} diff --git a/core/src/main/java/io/delta/core/data/ColumnVector.java b/core/src/main/java/io/delta/core/data/ColumnVector.java new file mode 100644 index 000000000..348ebf908 --- /dev/null +++ b/core/src/main/java/io/delta/core/data/ColumnVector.java @@ -0,0 +1,4 @@ +package io.delta.core.data; + +public interface ColumnVector extends AutoCloseable { +} diff --git a/core/src/main/java/io/delta/core/data/ColumnarBatch.java b/core/src/main/java/io/delta/core/data/ColumnarBatch.java new file mode 100644 index 000000000..456aeebe2 --- /dev/null +++ b/core/src/main/java/io/delta/core/data/ColumnarBatch.java @@ -0,0 +1,15 @@ +package io.delta.core.data; + +import io.delta.core.types.DataType; +import io.delta.core.types.StructType; + +public interface ColumnarBatch { + + StructType getSchema(); + + ColumnVector getColumnVector(String columnName); + + void addConstantColumn(String topLevelColumnName, DataType dataType, Row value); + + void rename(String currentColumnName, String newColumnName); +} diff --git a/core/src/main/java/io/delta/core/data/DeletionVector.java b/core/src/main/java/io/delta/core/data/DeletionVector.java new file mode 100644 index 000000000..5f213ccc6 --- /dev/null +++ b/core/src/main/java/io/delta/core/data/DeletionVector.java @@ -0,0 +1,5 @@ +package io.delta.core.data; + +public interface DeletionVector extends ColumnVector { + +} diff --git a/core/src/main/java/io/delta/core/data/Row.java b/core/src/main/java/io/delta/core/data/Row.java new file mode 100644 index 000000000..2e0ec9ed4 --- /dev/null +++ b/core/src/main/java/io/delta/core/data/Row.java @@ -0,0 +1,42 @@ +package io.delta.core.data; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import java.util.Map; + +public interface Row { + + boolean isNullAt(int ordinal); + + boolean getBoolean(int ordinal); + + byte getByte(int ordinal); + + short getShort(int ordinal); + + int getInt(int ordinal); + + long getLong(int ordinal); + + float getFloat(int ordinal); + + double getDouble(int ordinal); + + BigDecimal getDecimal(int ordinal, int precision, int scale); + + String getString(int ordinal); + + byte[] getBinary(int ordinal); + + Timestamp getTimestamp(int ordinal); + + Date getDate(int ordinal); + + Row getRecord(int ordinal); + + List getList(int ordinal); + + Map getMap(int ordinal); +} diff --git a/core/src/main/java/io/delta/core/expressions/Expression.java b/core/src/main/java/io/delta/core/expressions/Expression.java new file mode 100644 index 000000000..4a117af4d --- /dev/null +++ b/core/src/main/java/io/delta/core/expressions/Expression.java @@ -0,0 +1,4 @@ +package io.delta.core.expressions; + +public interface Expression { +} diff --git a/core/src/main/java/io/delta/core/fs/FileStatus.java b/core/src/main/java/io/delta/core/fs/FileStatus.java new file mode 100644 index 000000000..0f1cf6992 --- /dev/null +++ b/core/src/main/java/io/delta/core/fs/FileStatus.java @@ -0,0 +1,10 @@ +package io.delta.core.fs; + +public interface FileStatus { + + String filePath(); + + long size(); + + long modificationTime(); +} diff --git a/core/src/main/java/io/delta/core/helpers/ScanHelper.java b/core/src/main/java/io/delta/core/helpers/ScanHelper.java new file mode 100644 index 000000000..1d3af64d3 --- /dev/null +++ b/core/src/main/java/io/delta/core/helpers/ScanHelper.java @@ -0,0 +1,26 @@ +package io.delta.core.helpers; + +import java.io.Serializable; +import java.util.Optional; +import java.util.TimeZone; + +import io.delta.core.ColumnMappingMode; +import io.delta.core.data.ColumnarBatch; +import io.delta.core.expressions.Expression; +import io.delta.core.fs.FileStatus; +import io.delta.core.types.StructType; +import io.delta.core.utils.CloseableIterator; + +public interface ScanHelper extends Serializable { + + // Note: for production, will likely be a builder pattern to deal with all these optional params + + CloseableIterator readParquetDataFile( + FileStatus fileStatus, + StructType physicalReadSchema, + TimeZone timeZone, + Optional columnMappingMode, + Optional skippingFilter, + Optional deletionVectorFileStatus + ); +} diff --git a/core/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java b/core/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java new file mode 100644 index 000000000..18a5901ec --- /dev/null +++ b/core/src/main/java/io/delta/core/helpers/SupportsDeletionVector.java @@ -0,0 +1,9 @@ +package io.delta.core.helpers; + +import java.io.DataInputStream; + +/** Should only be extended by ScanHelper. */ +public interface SupportsDeletionVector { + + DataInputStream readDeletionVector(String path); +} diff --git a/core/src/main/java/io/delta/core/helpers/TableHelper.java b/core/src/main/java/io/delta/core/helpers/TableHelper.java new file mode 100644 index 000000000..e28480b95 --- /dev/null +++ b/core/src/main/java/io/delta/core/helpers/TableHelper.java @@ -0,0 +1,29 @@ +package io.delta.core.helpers; + +import io.delta.core.data.Row; +import io.delta.core.expressions.Expression; +import io.delta.core.fs.FileStatus; +import io.delta.core.types.StructType; +import io.delta.core.utils.CloseableIterator; + +public interface TableHelper { + + CloseableIterator listFiles(String path); + + CloseableIterator listFiles(String path, String prefixToListFrom); + + CloseableIterator readJsonFile(String path, StructType readSchema); + + /** Uses the readSchema for partition pruning. */ + CloseableIterator readParquetFile(String path, StructType readSchema); + + /** Uses the readSchema for partition pruning and the skippingFilter for data filtering. */ + CloseableIterator readParquetFile( + String path, + StructType readSchema, + Expression skippingFilter); + + Row parseStats(String statsJson); + + ScanHelper getScanHelper(); +} diff --git a/core/src/main/java/io/delta/core/types/DataType.java b/core/src/main/java/io/delta/core/types/DataType.java new file mode 100644 index 000000000..fe83dd087 --- /dev/null +++ b/core/src/main/java/io/delta/core/types/DataType.java @@ -0,0 +1,4 @@ +package io.delta.core.types; + +public abstract class DataType { } + diff --git a/core/src/main/java/io/delta/core/types/StructType.java b/core/src/main/java/io/delta/core/types/StructType.java new file mode 100644 index 000000000..c4bca2d99 --- /dev/null +++ b/core/src/main/java/io/delta/core/types/StructType.java @@ -0,0 +1,3 @@ +package io.delta.core.types; + +public final class StructType extends DataType { } diff --git a/core/src/main/java/io/delta/core/utils/CloseableIterator.java b/core/src/main/java/io/delta/core/utils/CloseableIterator.java new file mode 100644 index 000000000..f3b239a72 --- /dev/null +++ b/core/src/main/java/io/delta/core/utils/CloseableIterator.java @@ -0,0 +1,6 @@ +package io.delta.core.utils; + +import java.io.Closeable; +import java.util.Iterator; + +public interface CloseableIterator extends Iterator, Closeable { } diff --git a/default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java b/default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java new file mode 100644 index 000000000..cdd9579b3 --- /dev/null +++ b/default-core/src/main/java/io/delta/core/helpers/DefaultTableHelper.java @@ -0,0 +1,4 @@ +package io.delta.core.helpers; + +public class DefaultTableHelper implements TableHelper { +}