Skip to content

Commit 9cdda52

Browse files
authored
GH-891: Add ExtensionTypeWriterFactory to TransferPair (#892)
## What's Changed This PR simplifies extension type writer creation by moving from a factory-based pattern to a type-based pattern. Instead of passing `ExtensionTypeWriterFactory` instances through multiple API layers, extension types now provide their own writers via a new `getNewFieldWriter()` method on `ArrowType.ExtensionType`. - Added `getNewFieldWriter(ValueVector)` abstract method to `ArrowType.ExtensionType` - Removed `ExtensionTypeWriterFactory` interface and all implementations - Removed factory parameters from `ComplexCopier`, `PromotableWriter`, and `TransferPair` APIs - Updated `UnionWriter` to support extension types (previously threw `UnsupportedOperationException`) - Simplified extension type implementations (`UuidType`, `OpaqueType`) The factory pattern didn't scale well. Each new extension type required creating a separate factory class and passing it through multiple API layers. This was especially painful for external developers who had to maintain two classes per extension type and manage factory parameters everywhere. The new approach follows the same pattern as `MinorType`, where each type knows how to create its own writer. This reduces boilerplate, simplifies the API, and makes it easier to implement custom extension types outside arrow-java. ## Breaking Changes - `ExtensionTypeWriterFactory` has been removed - Extension types must now implement `getNewFieldWriter(ValueVector vector)` method - ExtensionHolders must implement `type()` which returns the `ExtensionType` for that Holder - (Writers are obtained directly from the extension type, not from a factory) ### Migration Guide - _Extension types must now implement `getNewFieldWriter(ValueVector vector)` method_ ```java public class UuidType extends ExtensionType { ... @OverRide public FieldWriter getNewFieldWriter(ValueVector vector) { return new UuidWriterImpl((UuidVector) vector); } ... } ``` - _ExtensionHolders must implement `type()` which returns the `ExtensionType` for that Holder_ ```java public class UuidHolder extends ExtensionHolder { ... @OverRide public ArrowType type() { return UuidType.INSTANCE; } ``` - How to use Extension Writers? **Before:** ```java writer.extension(UuidType.INSTANCE); writer.addExtensionTypeWriterFactory(extensionTypeWriterFactory); writer.writeExtension(value); ``` **After:** ```java writer.extension(UuidType.INSTANCE); writer.writeExtension(value); ``` - Also `copyAsValue` does not need to provide the factory anymore. Closes #891 .
1 parent 68451bf commit 9cdda52

40 files changed

+493
-359
lines changed

vector/src/main/codegen/templates/AbstractFieldReader.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,6 @@ public void copyAsField(String name, ${name}Writer writer) {
109109

110110
</#list></#list>
111111

112-
public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory) {
113-
fail("CopyAsValue StructWriter");
114-
}
115-
116112
public void read(ExtensionHolder holder) {
117113
fail("Extension");
118114
}
@@ -147,4 +143,5 @@ public int size() {
147143
private void fail(String name) {
148144
throw new IllegalArgumentException(String.format("You tried to read a [%s] type when you are using a field reader of type [%s].", name, this.getClass().getSimpleName()));
149145
}
146+
150147
}

vector/src/main/codegen/templates/AbstractFieldWriter.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,17 @@ public void endEntry() {
107107
throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
108108
}
109109

110+
@Override
110111
public void write(ExtensionHolder var1) {
111-
this.fail("ExtensionType");
112+
this.fail("Cannot write ExtensionHolder");
112113
}
114+
@Override
113115
public void writeExtension(Object var1) {
114-
this.fail("ExtensionType");
116+
this.fail("Cannot write extension object");
115117
}
116-
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
117-
this.fail("ExtensionType");
118+
@Override
119+
public void writeExtension(Object var1, ArrowType type) {
120+
this.fail("Cannot write extension with type " + type);
118121
}
119122

120123
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />

vector/src/main/codegen/templates/ArrowType.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727

2828
import org.apache.arrow.flatbuf.Type;
2929
import org.apache.arrow.memory.BufferAllocator;
30+
import org.apache.arrow.vector.complex.writer.FieldWriter;
3031
import org.apache.arrow.vector.types.*;
3132
import org.apache.arrow.vector.FieldVector;
33+
import org.apache.arrow.vector.ValueVector;
3234

3335
import com.fasterxml.jackson.annotation.JsonCreator;
3436
import com.fasterxml.jackson.annotation.JsonIgnore;
@@ -331,6 +333,10 @@ public boolean equals(Object obj) {
331333
public <T> T accept(ArrowTypeVisitor<T> visitor) {
332334
return visitor.visit(this);
333335
}
336+
337+
public FieldWriter getNewFieldWriter(ValueVector vector) {
338+
throw new UnsupportedOperationException("WriterImpl not yet implemented.");
339+
}
334340
}
335341

336342
private static final int defaultDecimalBitWidth = 128;

vector/src/main/codegen/templates/BaseReader.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ public interface RepeatedStructReader extends StructReader{
4949
boolean next();
5050
int size();
5151
void copyAsValue(StructWriter writer);
52-
void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory);
5352
}
5453

5554
public interface ListReader extends BaseReader{
@@ -60,7 +59,6 @@ public interface RepeatedListReader extends ListReader{
6059
boolean next();
6160
int size();
6261
void copyAsValue(ListWriter writer);
63-
void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory);
6462
}
6563

6664
public interface MapReader extends BaseReader{
@@ -71,7 +69,6 @@ public interface RepeatedMapReader extends MapReader{
7169
boolean next();
7270
int size();
7371
void copyAsValue(MapWriter writer);
74-
void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory);
7572
}
7673

7774
public interface ScalarReader extends

vector/src/main/codegen/templates/BaseWriter.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,12 @@ public interface ExtensionWriter extends BaseWriter {
125125
void writeExtension(Object value);
126126

127127
/**
128-
* Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector.
128+
* Writes the given extension type value.
129129
*
130-
* @param factory the extension type factory to add
130+
* @param value the extension type value to write
131+
* @param type of the extension
131132
*/
132-
void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory);
133+
void writeExtension(Object value, ArrowType type);
133134
}
134135

135136
public interface ScalarWriter extends

vector/src/main/codegen/templates/ComplexCopier.java

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,8 @@ public class ComplexCopier {
4141
* @param input field to read from
4242
* @param output field to write to
4343
*/
44-
public static void copy(FieldReader input, FieldWriter output) {
45-
writeValue(input, output, null);
46-
}
47-
48-
public static void copy(FieldReader input, FieldWriter output, ExtensionTypeWriterFactory extensionTypeWriterFactory) {
49-
writeValue(input, output, extensionTypeWriterFactory);
50-
}
44+
public static void copy(FieldReader reader, FieldWriter writer) {
5145

52-
private static void writeValue(FieldReader reader, FieldWriter writer, ExtensionTypeWriterFactory extensionTypeWriterFactory) {
5346
final MinorType mt = reader.getMinorType();
5447

5548
switch (mt) {
@@ -65,7 +58,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension
6558
FieldReader childReader = reader.reader();
6659
FieldWriter childWriter = getListWriterForReader(childReader, writer);
6760
if (childReader.isSet()) {
68-
writeValue(childReader, childWriter, extensionTypeWriterFactory);
61+
copy(childReader, childWriter);
6962
} else {
7063
childWriter.writeNull();
7164
}
@@ -83,8 +76,8 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension
8376
FieldReader structReader = reader.reader();
8477
if (structReader.isSet()) {
8578
writer.startEntry();
86-
writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()), extensionTypeWriterFactory);
87-
writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()), extensionTypeWriterFactory);
79+
copy(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()));
80+
copy(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()));
8881
writer.endEntry();
8982
} else {
9083
writer.writeNull();
@@ -103,7 +96,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension
10396
if (childReader.getMinorType() != Types.MinorType.NULL) {
10497
FieldWriter childWriter = getStructWriterForReader(childReader, writer, name);
10598
if (childReader.isSet()) {
106-
writeValue(childReader, childWriter, extensionTypeWriterFactory);
99+
copy(childReader, childWriter);
107100
} else {
108101
childWriter.writeNull();
109102
}
@@ -115,14 +108,10 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension
115108
}
116109
break;
117110
case EXTENSIONTYPE:
118-
if (extensionTypeWriterFactory == null) {
119-
throw new IllegalArgumentException("Must provide ExtensionTypeWriterFactory");
120-
}
121111
if (reader.isSet()) {
122112
Object value = reader.readObject();
123113
if (value != null) {
124-
writer.addExtensionTypeWriterFactory(extensionTypeWriterFactory);
125-
writer.writeExtension(value);
114+
writer.writeExtension(value, reader.getField().getType());
126115
}
127116
} else {
128117
writer.writeNull();

vector/src/main/codegen/templates/NullReader.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ public void read(int arrayIndex, Nullable${name}Holder holder){
8686
}
8787
</#list></#list>
8888

89-
public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory){}
9089
public void read(ExtensionHolder holder) {
9190
holder.isSet = 0;
9291
}

vector/src/main/codegen/templates/PromotableWriter.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ protected void setWriter(ValueVector v) {
286286
writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
287287
break;
288288
case EXTENSIONTYPE:
289-
writer = new UnionExtensionWriter((ExtensionTypeVector) vector);
289+
writer = ((ExtensionType) vector.getField().getType()).getNewFieldWriter(vector);
290290
break;
291291
default:
292292
writer = type.getNewFieldWriter(vector);
@@ -541,17 +541,13 @@ public void writeLargeVarChar(String value) {
541541
}
542542

543543
@Override
544-
public void writeExtension(Object value) {
545-
getWriter(MinorType.EXTENSIONTYPE).writeExtension(value);
544+
public void writeExtension(Object value, ArrowType arrowType) {
545+
getWriter(MinorType.EXTENSIONTYPE, arrowType).writeExtension(value, arrowType);
546546
}
547547

548548
@Override
549-
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) {
550-
getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory);
551-
}
552-
553-
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory, ArrowType arrowType) {
554-
getWriter(MinorType.EXTENSIONTYPE, arrowType).addExtensionTypeWriterFactory(factory);
549+
public void write(ExtensionHolder holder) {
550+
getWriter(MinorType.EXTENSIONTYPE, holder.type()).write(holder);
555551
}
556552

557553
@Override

vector/src/main/codegen/templates/UnionListWriter.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,13 @@ public MapWriter map(String name, boolean keysSorted) {
204204
205205
@Override
206206
public ExtensionWriter extension(ArrowType arrowType) {
207-
this.extensionType = arrowType;
207+
extensionType = arrowType;
208208
return this;
209209
}
210+
210211
@Override
211212
public ExtensionWriter extension(String name, ArrowType arrowType) {
212-
ExtensionWriter extensionWriter = writer.extension(name, arrowType);
213-
return extensionWriter;
213+
return writer.extension(name, arrowType);
214214
}
215215
216216
<#if listName == "LargeList">
@@ -337,13 +337,13 @@ public void writeNull() {
337337
338338
@Override
339339
public void writeExtension(Object value) {
340-
writer.writeExtension(value);
340+
writer.writeExtension(value, extensionType);
341341
writer.setPosition(writer.idx() + 1);
342342
}
343343
344344
@Override
345-
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
346-
writer.addExtensionTypeWriterFactory(var1, extensionType);
345+
public void writeExtension(Object value, ArrowType type) {
346+
writeExtension(value);
347347
}
348348
349349
public void write(ExtensionHolder var1) {

vector/src/main/codegen/templates/UnionReader.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ public void read(int index, UnionHolder holder) {
7979
}
8080

8181
private FieldReader getReaderForIndex(int index) {
82+
return getReaderForIndex(index, null);
83+
}
84+
85+
private FieldReader getReaderForIndex(int index, ArrowType type) {
8286
int typeValue = data.getTypeValue(index);
8387
FieldReader reader = (FieldReader) readers[typeValue];
8488
if (reader != null) {
@@ -105,11 +109,26 @@ private FieldReader getReaderForIndex(int index) {
105109
</#if>
106110
</#list>
107111
</#list>
112+
case EXTENSIONTYPE:
113+
if(type == null) {
114+
throw new RuntimeException("Cannot get Extension reader without an ArrowType");
115+
}
116+
return (FieldReader) getExtension(type);
108117
default:
109118
throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeValue]);
110119
}
111120
}
112121

122+
private ExtensionReader extensionReader;
123+
124+
private ExtensionReader getExtension(ArrowType type) {
125+
if (extensionReader == null) {
126+
extensionReader = data.getExtension(type).getReader();
127+
extensionReader.setPosition(idx());
128+
}
129+
return extensionReader;
130+
}
131+
113132
private SingleStructReaderImpl structReader;
114133

115134
private StructReader getStruct() {
@@ -240,4 +259,8 @@ public FieldReader reader() {
240259
public boolean next() {
241260
return getReaderForIndex(idx()).next();
242261
}
262+
263+
public void read(ExtensionHolder holder){
264+
getReaderForIndex(idx(), holder.type()).read(holder);
265+
}
243266
}

0 commit comments

Comments
 (0)