Skip to content

Commit c4eb637

Browse files
feat(lance): Upgrade Lance version for new writer functionality (#17900)
* upgrade lance version for new writer functionality * fix imports, exclude conflict
1 parent cd451a4 commit c4eb637

File tree

7 files changed

+37
-26
lines changed

7 files changed

+37
-26
lines changed

hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceReader.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,29 @@
1818

1919
package org.apache.hudi.io.storage;
2020

21-
import com.lancedb.lance.file.LanceFileReader;
22-
import org.apache.arrow.memory.BufferAllocator;
23-
import org.apache.arrow.vector.ipc.ArrowReader;
24-
import org.apache.arrow.vector.types.pojo.Schema;
2521
import org.apache.hudi.HoodieSchemaConversionUtils;
2622
import org.apache.hudi.common.bloom.BloomFilter;
2723
import org.apache.hudi.common.model.HoodieRecord;
2824
import org.apache.hudi.common.model.HoodieSparkRecord;
2925
import org.apache.hudi.common.schema.HoodieSchema;
3026
import org.apache.hudi.common.schema.HoodieSchemaUtils;
31-
import org.apache.hudi.io.memory.HoodieArrowAllocator;
3227
import org.apache.hudi.common.util.collection.ClosableIterator;
3328
import org.apache.hudi.common.util.collection.CloseableMappingIterator;
3429
import org.apache.hudi.common.util.collection.Pair;
3530
import org.apache.hudi.exception.HoodieException;
3631
import org.apache.hudi.exception.HoodieIOException;
32+
import org.apache.hudi.io.memory.HoodieArrowAllocator;
3733
import org.apache.hudi.storage.StoragePath;
34+
35+
import org.apache.arrow.memory.BufferAllocator;
36+
import org.apache.arrow.vector.ipc.ArrowReader;
37+
import org.apache.arrow.vector.types.pojo.Schema;
3838
import org.apache.spark.sql.catalyst.InternalRow;
3939
import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
4040
import org.apache.spark.sql.types.StructField;
4141
import org.apache.spark.sql.types.StructType;
4242
import org.apache.spark.sql.util.LanceArrowUtils;
43+
import org.lance.file.LanceFileReader;
4344

4445
import java.io.IOException;
4546
import java.util.ArrayList;

hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/LanceRecordIterator.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,21 @@
1818

1919
package org.apache.hudi.io.storage;
2020

21-
import com.lancedb.lance.file.LanceFileReader;
22-
import org.apache.arrow.memory.BufferAllocator;
23-
import org.apache.arrow.vector.VectorSchemaRoot;
24-
import org.apache.arrow.vector.ipc.ArrowReader;
2521
import org.apache.hudi.common.util.collection.ClosableIterator;
2622
import org.apache.hudi.exception.HoodieException;
2723
import org.apache.hudi.exception.HoodieIOException;
24+
25+
import org.apache.arrow.memory.BufferAllocator;
26+
import org.apache.arrow.vector.VectorSchemaRoot;
27+
import org.apache.arrow.vector.ipc.ArrowReader;
2828
import org.apache.spark.sql.catalyst.InternalRow;
2929
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection;
3030
import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
3131
import org.apache.spark.sql.types.StructType;
3232
import org.apache.spark.sql.vectorized.ColumnVector;
3333
import org.apache.spark.sql.vectorized.ColumnarBatch;
3434
import org.apache.spark.sql.vectorized.LanceArrowColumnVector;
35+
import org.lance.file.LanceFileReader;
3536

3637
import java.io.IOException;
3738
import java.util.Iterator;

hudi-hadoop-common/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
</dependency>
161161
<!-- Lance Core SDK -->
162162
<dependency>
163-
<groupId>com.lancedb</groupId>
163+
<groupId>org.lance</groupId>
164164
<artifactId>lance-core</artifactId>
165165
</dependency>
166166
</dependencies>

hudi-hadoop-common/src/main/java/org/apache/hudi/io/lance/HoodieBaseLanceWriter.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,18 @@
1919

2020
package org.apache.hudi.io.lance;
2121

22-
import com.lancedb.lance.file.LanceFileWriter;
23-
import org.apache.arrow.memory.BufferAllocator;
24-
import org.apache.arrow.vector.VectorSchemaRoot;
25-
import org.apache.arrow.vector.types.pojo.Schema;
26-
import org.apache.hudi.io.memory.HoodieArrowAllocator;
2722
import org.apache.hudi.exception.HoodieException;
23+
import org.apache.hudi.io.memory.HoodieArrowAllocator;
2824
import org.apache.hudi.storage.HoodieStorage;
2925
import org.apache.hudi.storage.StoragePath;
3026

27+
import org.apache.arrow.memory.BufferAllocator;
28+
import org.apache.arrow.vector.VectorSchemaRoot;
29+
import org.apache.arrow.vector.types.pojo.Schema;
30+
import org.lance.file.LanceFileWriter;
31+
3132
import javax.annotation.concurrent.NotThreadSafe;
33+
3234
import java.io.Closeable;
3335
import java.io.IOException;
3436
import java.util.ArrayList;

hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/lance/SparkLanceReaderBase.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import org.apache.hudi.io.memory.HoodieArrowAllocator
2525
import org.apache.hudi.io.storage.{HoodieSparkLanceReader, LanceRecordIterator}
2626
import org.apache.hudi.storage.StorageConfiguration
2727

28-
import com.lancedb.lance.file.LanceFileReader
2928
import org.apache.hadoop.conf.Configuration
3029
import org.apache.parquet.schema.MessageType
3130
import org.apache.spark.TaskContext
@@ -35,6 +34,7 @@ import org.apache.spark.sql.execution.datasources.{PartitionedFile, SparkColumna
3534
import org.apache.spark.sql.sources.Filter
3635
import org.apache.spark.sql.types.StructType
3736
import org.apache.spark.sql.util.LanceArrowUtils
37+
import org.lance.file.LanceFileReader
3838

3939
import java.io.IOException
4040

@@ -66,7 +66,7 @@ class SparkLanceReaderBase(enableVectorizedReader: Boolean) extends SparkColumna
6666
requiredSchema: StructType,
6767
partitionSchema: StructType,
6868
internalSchemaOpt: util.Option[InternalSchema],
69-
filters: Seq[Filter],
69+
filters: scala.Seq[Filter],
7070
storageConf: StorageConfiguration[Configuration],
7171
tableSchemaOpt: util.Option[MessageType] = util.Option.empty()): Iterator[InternalRow] = {
7272

hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieSparkLanceWriter.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@
1818

1919
package org.apache.hudi.io.storage;
2020

21-
import com.lancedb.lance.file.LanceFileReader;
21+
import org.apache.hudi.client.SparkTaskContextSupplier;
22+
import org.apache.hudi.common.model.HoodieKey;
23+
import org.apache.hudi.common.testutils.HoodieTestUtils;
24+
import org.apache.hudi.io.memory.HoodieArrowAllocator;
25+
import org.apache.hudi.storage.HoodieStorage;
26+
import org.apache.hudi.storage.StoragePath;
27+
2228
import org.apache.arrow.memory.BufferAllocator;
2329
import org.apache.arrow.memory.RootAllocator;
2430
import org.apache.arrow.vector.BigIntVector;
@@ -30,12 +36,6 @@
3036
import org.apache.arrow.vector.VarCharVector;
3137
import org.apache.arrow.vector.VectorSchemaRoot;
3238
import org.apache.arrow.vector.ipc.ArrowReader;
33-
import org.apache.hudi.client.SparkTaskContextSupplier;
34-
import org.apache.hudi.common.model.HoodieKey;
35-
import org.apache.hudi.common.testutils.HoodieTestUtils;
36-
import org.apache.hudi.io.memory.HoodieArrowAllocator;
37-
import org.apache.hudi.storage.HoodieStorage;
38-
import org.apache.hudi.storage.StoragePath;
3939
import org.apache.spark.sql.catalyst.InternalRow;
4040
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
4141
import org.apache.spark.sql.types.DataTypes;
@@ -46,6 +46,7 @@
4646
import org.junit.jupiter.api.Test;
4747
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
4848
import org.junit.jupiter.api.io.TempDir;
49+
import org.lance.file.LanceFileReader;
4950

5051
import java.io.File;
5152
import java.io.IOException;

pom.xml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@
241241
<spring.shell.version>2.1.1</spring.shell.version>
242242
<snappy.version>1.1.10.7</snappy.version>
243243
<arrow.version>18.3.0</arrow.version>
244-
<lance.version>0.39.0</lance.version>
244+
<lance.version>1.0.2</lance.version>
245245
<lance.spark.connector.version>0.0.15</lance.spark.connector.version>
246246
<lance.spark.artifact>lance-spark-3.5_${scala.binary.version}</lance.spark.artifact>
247247
<lance.skip.tests>false</lance.skip.tests>
@@ -956,7 +956,7 @@
956956
</dependency>
957957
<!-- Lance Core SDK -->
958958
<dependency>
959-
<groupId>com.lancedb</groupId>
959+
<groupId>org.lance</groupId>
960960
<artifactId>lance-core</artifactId>
961961
<version>${lance.version}</version>
962962
</dependency>
@@ -965,6 +965,12 @@
965965
<groupId>com.lancedb</groupId>
966966
<artifactId>${lance.spark.artifact}</artifactId>
967967
<version>${lance.spark.connector.version}</version>
968+
<exclusions>
969+
<exclusion>
970+
<groupId>com.lancedb</groupId>
971+
<artifactId>lance-core</artifactId>
972+
</exclusion>
973+
</exclusions>
968974
</dependency>
969975
<!-- Apache Arrow -->
970976
<dependency>

0 commit comments

Comments
 (0)