Skip to content

Commit 8bd1f24

Browse files
authored
chore: Annotate classes/methods/fields that are used by Apache Iceberg (#3237)
1 parent 590bb0a commit 8bd1f24

17 files changed

+131
-11
lines changed

common/src/main/java/org/apache/arrow/c/AbstractCometSchemaImporter.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import org.apache.arrow.vector.FieldVector;
2424
import org.apache.arrow.vector.types.pojo.Field;
2525

26+
import org.apache.comet.IcebergApi;
27+
2628
/** This is a simple wrapper around SchemaImporter to make it accessible from Java Arrow. */
2729
public abstract class AbstractCometSchemaImporter {
2830
private final BufferAllocator allocator;
@@ -67,6 +69,7 @@ public FieldVector importVector(ArrowArray array, ArrowSchema schema) {
6769
return vector;
6870
}
6971

72+
@IcebergApi
7073
public void close() {
7174
provider.close();
7275
}

common/src/main/java/org/apache/comet/CometSchemaImporter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
import org.apache.arrow.memory.BufferAllocator;
2424

2525
/** This is a simple wrapper around SchemaImporter to make it accessible from Java Arrow. */
26+
@IcebergApi
2627
public class CometSchemaImporter extends AbstractCometSchemaImporter {
28+
@IcebergApi
2729
public CometSchemaImporter(BufferAllocator allocator) {
2830
super(allocator);
2931
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.comet;
21+
22+
import java.lang.annotation.Documented;
23+
import java.lang.annotation.ElementType;
24+
import java.lang.annotation.Retention;
25+
import java.lang.annotation.RetentionPolicy;
26+
import java.lang.annotation.Target;
27+
28+
/**
29+
* Indicates that the annotated element is part of the public API used by Apache Iceberg.
30+
*
31+
* <p>This annotation marks classes, methods, constructors, and fields that form the contract
32+
* between Comet and Iceberg. Changes to these APIs may break Iceberg's Comet integration, so
33+
* contributors should exercise caution and consider backward compatibility when modifying annotated
34+
* elements.
35+
*
36+
* <p>The Iceberg integration uses Comet's native Parquet reader for accelerated vectorized reads.
37+
* See the contributor guide documentation for details on how Iceberg uses these APIs.
38+
*
39+
* @see <a href="https://iceberg.apache.org/">Apache Iceberg</a>
40+
*/
41+
@Documented
42+
@Retention(RetentionPolicy.RUNTIME)
43+
@Target({ElementType.TYPE, ElementType.METHOD, ElementType.CONSTRUCTOR, ElementType.FIELD})
44+
public @interface IcebergApi {}

common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
import org.apache.spark.sql.types.TimestampNTZType$;
2929

3030
import org.apache.comet.CometConf;
31+
import org.apache.comet.IcebergApi;
3132
import org.apache.comet.vector.CometVector;
3233

3334
/** Base class for Comet Parquet column reader implementations. */
35+
@IcebergApi
3436
public abstract class AbstractColumnReader implements AutoCloseable {
3537
protected static final Logger LOG = LoggerFactory.getLogger(AbstractColumnReader.class);
3638

@@ -61,7 +63,7 @@ public abstract class AbstractColumnReader implements AutoCloseable {
6163
protected int batchSize;
6264

6365
/** A pointer to the native implementation of ColumnReader. */
64-
protected long nativeHandle;
66+
@IcebergApi protected long nativeHandle;
6567

6668
AbstractColumnReader(
6769
DataType type,
@@ -96,6 +98,7 @@ String getPath() {
9698
/**
9799
* Set the batch size of this reader to be 'batchSize'. Also initializes the native column reader.
98100
*/
101+
@IcebergApi
99102
public void setBatchSize(int batchSize) {
100103
assert nativeHandle == 0
101104
: "Native column reader shouldn't be initialized before " + "'setBatchSize' is called";
@@ -113,6 +116,7 @@ public void setBatchSize(int batchSize) {
113116
/** Returns the {@link CometVector} read by this reader. */
114117
public abstract CometVector currentBatch();
115118

119+
@IcebergApi
116120
@Override
117121
public void close() {
118122
if (nativeHandle != 0) {

common/src/main/java/org/apache/comet/parquet/BatchReader.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565

6666
import org.apache.comet.CometConf;
6767
import org.apache.comet.CometSchemaImporter;
68+
import org.apache.comet.IcebergApi;
6869
import org.apache.comet.shims.ShimBatchReader;
6970
import org.apache.comet.shims.ShimFileFormat;
7071
import org.apache.comet.vector.CometVector;
@@ -87,6 +88,7 @@
8788
* }
8889
* </pre>
8990
*/
91+
@IcebergApi
9092
public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Closeable {
9193
private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
9294
protected static final BufferAllocator ALLOCATOR = new RootAllocator();
@@ -186,9 +188,9 @@ public BatchReader(
186188
}
187189

188190
/**
189-
* @deprecated since 0.10.0, will be removed in 0.11.0.
190191
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
191192
*/
193+
@IcebergApi
192194
public BatchReader(AbstractColumnReader[] columnReaders) {
193195
// Todo: set useDecimal128 and useLazyMaterialization
194196
int numColumns = columnReaders.length;
@@ -384,17 +386,17 @@ public void init() throws URISyntaxException, IOException {
384386
}
385387

386388
/**
387-
* @deprecated since 0.10.0, will be removed in 0.11.0.
388389
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
389390
*/
391+
@IcebergApi
390392
public void setSparkSchema(StructType schema) {
391393
this.sparkSchema = schema;
392394
}
393395

394396
/**
395-
* @deprecated since 0.10.0, will be removed in 0.11.0.
396397
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
397398
*/
399+
@IcebergApi
398400
public AbstractColumnReader[] getColumnReaders() {
399401
return columnReaders;
400402
}
@@ -498,6 +500,7 @@ public boolean nextBatch() throws IOException {
498500
return nextBatch(batchSize);
499501
}
500502

503+
@IcebergApi
501504
public boolean nextBatch(int batchSize) {
502505
long totalDecodeTime = 0, totalLoadTime = 0;
503506
for (int i = 0; i < columnReaders.length; i++) {
@@ -524,6 +527,7 @@ public boolean nextBatch(int batchSize) {
524527
return true;
525528
}
526529

530+
@IcebergApi
527531
@Override
528532
public void close() throws IOException {
529533
if (columnReaders != null) {

common/src/main/java/org/apache/comet/parquet/ColumnReader.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@
4444

4545
import org.apache.comet.CometConf;
4646
import org.apache.comet.CometSchemaImporter;
47+
import org.apache.comet.IcebergApi;
4748
import org.apache.comet.vector.CometDecodedVector;
4849
import org.apache.comet.vector.CometDictionary;
4950
import org.apache.comet.vector.CometDictionaryVector;
5051
import org.apache.comet.vector.CometPlainVector;
5152
import org.apache.comet.vector.CometVector;
5253

54+
@IcebergApi
5355
public class ColumnReader extends AbstractColumnReader {
5456
protected static final Logger LOG = LoggerFactory.getLogger(ColumnReader.class);
5557
protected final BufferAllocator ALLOCATOR = new RootAllocator();
@@ -111,9 +113,9 @@ public class ColumnReader extends AbstractColumnReader {
111113
* Set the page reader for a new column chunk to read. Expects to call `readBatch` after this.
112114
*
113115
* @param pageReader the page reader for the new column chunk
114-
* @deprecated since 0.10.0, will be removed in 0.11.0.
115116
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
116117
*/
118+
@IcebergApi
117119
public void setPageReader(PageReader pageReader) throws IOException {
118120
this.pageReader = pageReader;
119121

@@ -129,6 +131,7 @@ public void setPageReader(PageReader pageReader) throws IOException {
129131
}
130132

131133
/** This method is called from Apache Iceberg. */
134+
@IcebergApi
132135
public void setRowGroupReader(RowGroupReader rowGroupReader, ParquetColumnSpec columnSpec)
133136
throws IOException {
134137
ColumnDescriptor descriptor = Utils.buildColumnDescriptor(columnSpec);

common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,13 @@
2727
import org.apache.spark.sql.types.*;
2828
import org.apache.spark.unsafe.types.UTF8String;
2929

30+
import org.apache.comet.IcebergApi;
31+
3032
/**
3133
* A column reader that always return constant vectors. Used for reading partition columns, for
3234
* instance.
3335
*/
36+
@IcebergApi
3437
public class ConstantColumnReader extends MetadataColumnReader {
3538
/** Whether all the values in this constant column are nulls */
3639
private boolean isNull;
@@ -53,16 +56,17 @@ public class ConstantColumnReader extends MetadataColumnReader {
5356
}
5457

5558
/**
56-
* @deprecated since 0.10.0, will be removed in 0.11.0.
5759
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
5860
*/
61+
@IcebergApi
5962
public ConstantColumnReader(
6063
DataType type, ColumnDescriptor descriptor, Object value, boolean useDecimal128) {
6164
super(type, descriptor, useDecimal128, true);
6265
this.value = value;
6366
}
6467

6568
// Used by Iceberg
69+
@IcebergApi
6670
public ConstantColumnReader(
6771
DataType type, ParquetColumnSpec spec, Object value, boolean useDecimal128) {
6872
super(type, spec, useDecimal128, true);

common/src/main/java/org/apache/comet/parquet/FileReader.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@
9090
import org.apache.parquet.schema.PrimitiveType;
9191
import org.apache.spark.sql.execution.metric.SQLMetric;
9292

93+
import org.apache.comet.IcebergApi;
94+
9395
import static org.apache.parquet.hadoop.ParquetFileWriter.EFMAGIC;
9496
import static org.apache.parquet.hadoop.ParquetFileWriter.MAGIC;
9597

@@ -101,6 +103,7 @@
101103
* A Parquet file reader. Mostly followed {@code ParquetFileReader} in {@code parquet-mr}, but with
102104
* customizations & optimizations for Comet.
103105
*/
106+
@IcebergApi
104107
public class FileReader implements Closeable {
105108
private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
106109

@@ -135,6 +138,7 @@ public class FileReader implements Closeable {
135138
}
136139

137140
/** This constructor is called from Apache Iceberg. */
141+
@IcebergApi
138142
public FileReader(
139143
WrappedInputFile file,
140144
ReadOptions cometOptions,
@@ -258,6 +262,7 @@ public void setRequestedSchema(List<ColumnDescriptor> projection) {
258262
}
259263

260264
/** This method is called from Apache Iceberg. */
265+
@IcebergApi
261266
public void setRequestedSchemaFromSpecs(List<ParquetColumnSpec> specList) {
262267
paths.clear();
263268
for (ParquetColumnSpec colSpec : specList) {
@@ -336,6 +341,7 @@ public long getFilteredRecordCount() {
336341
}
337342

338343
/** Skips the next row group. Returns false if there's no row group to skip. Otherwise, true. */
344+
@IcebergApi
339345
public boolean skipNextRowGroup() {
340346
return advanceToNextBlock();
341347
}
@@ -344,6 +350,7 @@ public boolean skipNextRowGroup() {
344350
* Returns the next row group to read (after applying row group filtering), or null if there's no
345351
* more row group.
346352
*/
353+
@IcebergApi
347354
public RowGroupReader readNextRowGroup() throws IOException {
348355
if (currentBlock == blocks.size()) {
349356
return null;
@@ -864,6 +871,7 @@ public void closeStream() throws IOException {
864871
}
865872
}
866873

874+
@IcebergApi
867875
@Override
868876
public void close() throws IOException {
869877
try {

common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@
2828
import org.apache.parquet.column.ColumnDescriptor;
2929
import org.apache.spark.sql.types.DataType;
3030

31+
import org.apache.comet.IcebergApi;
3132
import org.apache.comet.vector.CometPlainVector;
3233
import org.apache.comet.vector.CometVector;
3334

3435
/** A metadata column reader that can be extended by {@link RowIndexColumnReader} etc. */
36+
@IcebergApi
3537
public class MetadataColumnReader extends AbstractColumnReader {
3638
private final BufferAllocator allocator = new RootAllocator();
3739

@@ -43,9 +45,9 @@ public class MetadataColumnReader extends AbstractColumnReader {
4345
private boolean isConstant;
4446

4547
/**
46-
* @deprecated since 0.10.0, will be made package private in 0.11.0.
4748
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
4849
*/
50+
@IcebergApi
4951
public MetadataColumnReader(
5052
DataType type, ColumnDescriptor descriptor, boolean useDecimal128, boolean isConstant) {
5153
// TODO: should we handle legacy dates & timestamps for metadata columns?
@@ -55,6 +57,7 @@ public MetadataColumnReader(
5557
}
5658

5759
// Used by Iceberg
60+
@IcebergApi
5861
public MetadataColumnReader(
5962
DataType type, ParquetColumnSpec spec, boolean useDecimal128, boolean isConstant) {
6063
// TODO: should we handle legacy dates & timestamps for metadata columns?
@@ -69,6 +72,7 @@ public void setBatchSize(int batchSize) {
6972
super.setBatchSize(batchSize);
7073
}
7174

75+
@IcebergApi
7276
@Override
7377
public void readBatch(int total) {
7478
if (vector == null) {
@@ -90,6 +94,7 @@ void setNumNulls(int total) {
9094
vector.setNumNulls(total);
9195
}
9296

97+
@IcebergApi
9398
@Override
9499
public CometVector currentBatch() {
95100
return vector;

common/src/main/java/org/apache/comet/parquet/Native.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.nio.ByteBuffer;
2323
import java.util.Map;
2424

25+
import org.apache.comet.IcebergApi;
2526
import org.apache.comet.NativeBase;
2627

2728
public final class Native extends NativeBase {
@@ -143,6 +144,7 @@ public static native void setPageV2(
143144
*
144145
* @param handle the handle to the native Parquet column reader
145146
*/
147+
@IcebergApi
146148
public static native void resetBatch(long handle);
147149

148150
/**
@@ -221,12 +223,14 @@ public static native void setPageV2(
221223
public static native void setDecimal(long handle, byte[] value);
222224

223225
/** Set position of row index vector for Iceberg Metadata Column */
226+
@IcebergApi
224227
public static native void setPosition(long handle, long value, int size);
225228

226229
/** Set row index vector for Spark row index metadata column and return vector size */
227230
public static native int setIndices(long handle, long offset, int size, long[] indices);
228231

229232
/** Set deleted info for Iceberg Metadata Column */
233+
@IcebergApi
230234
public static native void setIsDeleted(long handle, boolean[] isDeleted);
231235

232236
/**

0 commit comments

Comments
 (0)