88import io .cloudquery .schema .Resource ;
99import io .cloudquery .schema .Table ;
1010import io .cloudquery .schema .Table .TableBuilder ;
11+ import io .cloudquery .types .JSONType ;
1112import io .cloudquery .types .JSONType .JSONVector ;
13+ import io .cloudquery .types .UUIDType ;
1214import io .cloudquery .types .UUIDType .UUIDVector ;
1315import java .io .ByteArrayOutputStream ;
1416import java .io .IOException ;
1517import java .nio .channels .Channels ;
16- import java .util .ArrayList ;
17- import java .util .HashMap ;
18- import java .util .List ;
19- import java .util .Map ;
20- import java .util .Objects ;
18+ import java .time .Duration ;
19+ import java .util .*;
2120import org .apache .arrow .memory .BufferAllocator ;
2221import org .apache .arrow .memory .RootAllocator ;
23- import org .apache .arrow .vector .BigIntVector ;
24- import org .apache .arrow .vector .BitVector ;
25- import org .apache .arrow .vector .DateDayVector ;
26- import org .apache .arrow .vector .FieldVector ;
27- import org .apache .arrow .vector .FixedSizeBinaryVector ;
28- import org .apache .arrow .vector .Float4Vector ;
29- import org .apache .arrow .vector .Float8Vector ;
30- import org .apache .arrow .vector .IntVector ;
31- import org .apache .arrow .vector .LargeVarBinaryVector ;
32- import org .apache .arrow .vector .LargeVarCharVector ;
33- import org .apache .arrow .vector .SmallIntVector ;
34- import org .apache .arrow .vector .TimeStampVector ;
35- import org .apache .arrow .vector .TinyIntVector ;
36- import org .apache .arrow .vector .UInt1Vector ;
37- import org .apache .arrow .vector .UInt2Vector ;
38- import org .apache .arrow .vector .UInt4Vector ;
39- import org .apache .arrow .vector .UInt8Vector ;
40- import org .apache .arrow .vector .VarBinaryVector ;
41- import org .apache .arrow .vector .VarCharVector ;
42- import org .apache .arrow .vector .VectorSchemaRoot ;
22+ import org .apache .arrow .vector .*;
4323import org .apache .arrow .vector .ipc .ArrowReader ;
4424import org .apache .arrow .vector .ipc .ArrowStreamReader ;
4525import org .apache .arrow .vector .ipc .ArrowStreamWriter ;
26+ import org .apache .arrow .vector .types .pojo .ArrowType ;
4627import org .apache .arrow .vector .types .pojo .Field ;
4728import org .apache .arrow .vector .types .pojo .FieldType ;
4829import org .apache .arrow .vector .types .pojo .Schema ;
4930import org .apache .arrow .vector .util .Text ;
31+ import org .joou .UByte ;
32+ import org .joou .UInteger ;
33+ import org .joou .ULong ;
34+ import org .joou .UShort ;
5035
5136public class ArrowHelper {
5237 public static final String CQ_EXTENSION_INCREMENTAL = "cq:extension:incremental" ;
@@ -72,6 +57,32 @@ private static void setVectorData(FieldVector vector, Object data) {
7257 bitVector .set (0 , (boolean ) data ? 1 : 0 );
7358 return ;
7459 }
60+ if (vector instanceof DateDayVector dayDateVector ) {
61+ dayDateVector .set (0 , (int ) data );
62+ return ;
63+ }
64+ if (vector instanceof DateMilliVector dateMilliVector ) {
65+ dateMilliVector .set (0 , (long ) data );
66+ return ;
67+ }
68+ if (vector instanceof DurationVector durationVector ) {
69+ Duration duration = (Duration ) data ;
70+ switch (durationVector .getUnit ()) {
71+ case SECOND -> {
72+ durationVector .set (0 , duration .toSeconds ());
73+ }
74+ case MILLISECOND -> {
75+ durationVector .set (0 , duration .toMillis ());
76+ }
77+ case MICROSECOND -> {
78+ durationVector .set (0 , duration .toNanos () / 1000 );
79+ }
80+ case NANOSECOND -> {
81+ durationVector .set (0 , duration .toNanos ());
82+ }
83+ }
84+ return ;
85+ }
7586 if (vector instanceof FixedSizeBinaryVector fixedSizeBinaryVector ) {
7687 fixedSizeBinaryVector .set (0 , (byte []) data );
7788 return ;
@@ -100,6 +111,22 @@ private static void setVectorData(FieldVector vector, Object data) {
100111 smallIntVector .set (0 , (short ) data );
101112 return ;
102113 }
114+ if (vector instanceof TimeMicroVector timeMicroVector ) {
115+ timeMicroVector .set (0 , (long ) data );
116+ return ;
117+ }
118+ if (vector instanceof TimeMilliVector timeMilliVector ) {
119+ timeMilliVector .set (0 , (int ) data );
120+ return ;
121+ }
122+ if (vector instanceof TimeNanoVector timeNanoVector ) {
123+ timeNanoVector .set (0 , (long ) data );
124+ return ;
125+ }
126+ if (vector instanceof TimeSecVector timeSecVector ) {
127+ timeSecVector .set (0 , (int ) data );
128+ return ;
129+ }
103130 if (vector instanceof TimeStampVector timeStampVector ) {
104131 timeStampVector .set (0 , (long ) data );
105132 return ;
@@ -109,19 +136,19 @@ private static void setVectorData(FieldVector vector, Object data) {
109136 return ;
110137 }
111138 if (vector instanceof UInt1Vector uInt1Vector ) {
112- uInt1Vector .set (0 , (byte ) data );
139+ uInt1Vector .set (0 , (( UByte ) data ). shortValue () );
113140 return ;
114141 }
115142 if (vector instanceof UInt2Vector uInt2Vector ) {
116- uInt2Vector .set (0 , (short ) data );
143+ uInt2Vector .set (0 , (( UShort ) data ). intValue () );
117144 return ;
118145 }
119146 if (vector instanceof UInt4Vector uInt4Vector ) {
120- uInt4Vector .set (0 , (int ) data );
147+ uInt4Vector .set (0 , (( UInteger ) data ). intValue () );
121148 return ;
122149 }
123150 if (vector instanceof UInt8Vector uInt8Vector ) {
124- uInt8Vector .set (0 , (long ) data );
151+ uInt8Vector .set (0 , (( ULong ) data ). longValue () );
125152 return ;
126153 }
127154 if (vector instanceof VarBinaryVector varBinaryVector ) {
@@ -132,16 +159,14 @@ private static void setVectorData(FieldVector vector, Object data) {
132159 vectorCharVector .set (0 , (Text ) data );
133160 return ;
134161 }
135- if (vector instanceof UUIDVector uuidVector ) {
136- uuidVector .set (0 , (java .util .UUID ) data );
137- return ;
138- }
162+ // CloudQuery-specific
139163 if (vector instanceof JSONVector jsonVector ) {
140164 jsonVector .setSafe (0 , (byte []) data );
141165 return ;
142166 }
143- if (vector instanceof DateDayVector dayDateVector ) {
144- dayDateVector .set (0 , (int ) data );
167+ // CloudQuery-specific
168+ if (vector instanceof UUIDVector uuidVector ) {
169+ uuidVector .set (0 , (java .util .UUID ) data );
145170 return ;
146171 }
147172
@@ -177,17 +202,7 @@ public static Schema toArrowSchema(Table table) {
177202 List <Column > columns = table .getColumns ();
178203 Field [] fields = new Field [columns .size ()];
179204 for (int i = 0 ; i < columns .size (); i ++) {
180- Column column = columns .get (i );
181- Map <String , String > metadata = new HashMap <>();
182- metadata .put (CQ_EXTENSION_UNIQUE , Boolean .toString (column .isUnique ()));
183- metadata .put (CQ_EXTENSION_PRIMARY_KEY , Boolean .toString (column .isPrimaryKey ()));
184- metadata .put (CQ_EXTENSION_INCREMENTAL , Boolean .toString (column .isIncrementalKey ()));
185- Field field =
186- new Field (
187- column .getName (),
188- new FieldType (!column .isNotNull (), column .getType (), null , metadata ),
189- null );
190- fields [i ] = field ;
205+ fields [i ] = getField (columns .get (i ));
191206 }
192207 Map <String , String > metadata = new HashMap <>();
193208 metadata .put (CQ_TABLE_NAME , table .getName ());
@@ -204,23 +219,21 @@ public static Schema toArrowSchema(Table table) {
204219 return new Schema (asList (fields ), metadata );
205220 }
206221
222+ private static Field getField (Column column ) {
223+ Map <String , String > metadata = new HashMap <>();
224+ metadata .put (CQ_EXTENSION_UNIQUE , Boolean .toString (column .isUnique ()));
225+ metadata .put (CQ_EXTENSION_PRIMARY_KEY , Boolean .toString (column .isPrimaryKey ()));
226+ metadata .put (CQ_EXTENSION_INCREMENTAL , Boolean .toString (column .isIncrementalKey ()));
227+ return new Field (
228+ column .getName (),
229+ new FieldType (!column .isNotNull (), column .getType (), null , metadata ),
230+ null );
231+ }
232+
207233 public static Table fromArrowSchema (Schema schema ) {
208234 List <Column > columns = new ArrayList <>();
209235 for (Field field : schema .getFields ()) {
210- boolean isUnique = Objects .equals (field .getMetadata ().get (CQ_EXTENSION_UNIQUE ), "true" );
211- boolean isPrimaryKey =
212- Objects .equals (field .getMetadata ().get (CQ_EXTENSION_PRIMARY_KEY ), "true" );
213- boolean isIncrementalKey =
214- Objects .equals (field .getMetadata ().get (CQ_EXTENSION_INCREMENTAL ), "true" );
215-
216- columns .add (
217- Column .builder ()
218- .name (field .getName ())
219- .unique (isUnique )
220- .primaryKey (isPrimaryKey )
221- .incrementalKey (isIncrementalKey )
222- .type (field .getType ())
223- .build ());
236+ columns .add (getColumn (field ));
224237 }
225238
226239 Map <String , String > metaData = schema .getCustomMetadata ();
@@ -244,6 +257,40 @@ public static Table fromArrowSchema(Schema schema) {
244257 return tableBuilder .build ();
245258 }
246259
260+ private static Column getColumn (Field field ) {
261+ boolean isUnique = Objects .equals (field .getMetadata ().get (CQ_EXTENSION_UNIQUE ), "true" );
262+ boolean isPrimaryKey =
263+ Objects .equals (field .getMetadata ().get (CQ_EXTENSION_PRIMARY_KEY ), "true" );
264+ boolean isIncrementalKey =
265+ Objects .equals (field .getMetadata ().get (CQ_EXTENSION_INCREMENTAL ), "true" );
266+
267+ ArrowType fieldType = field .getType ();
268+ String extensionName =
269+ field .getMetadata ().get (ArrowType .ExtensionType .EXTENSION_METADATA_KEY_NAME );
270+ String extensionMetadata =
271+ field .getMetadata ().get (ArrowType .ExtensionType .EXTENSION_METADATA_KEY_METADATA );
272+
273+ // We need to scan our extension types manually because of
274+ // https://github.com/apache/arrow/issues/38891
275+ if (JSONType .EXTENSION_NAME .equals (extensionName )
276+ && JSONType .INSTANCE .serialize ().equals (extensionMetadata )
277+ && JSONType .INSTANCE .storageType ().equals (fieldType )) {
278+ fieldType = JSONType .INSTANCE ;
279+ } else if (UUIDType .EXTENSION_NAME .equals (extensionName )
280+ && UUIDType .INSTANCE .serialize ().equals (extensionMetadata )
281+ && UUIDType .INSTANCE .storageType ().equals (fieldType )) {
282+ fieldType = UUIDType .INSTANCE ;
283+ }
284+
285+ return Column .builder ()
286+ .name (field .getName ())
287+ .unique (isUnique )
288+ .primaryKey (isPrimaryKey )
289+ .incrementalKey (isIncrementalKey )
290+ .type (fieldType )
291+ .build ();
292+ }
293+
247294 public static ByteString encode (Resource resource ) throws IOException {
248295 try (BufferAllocator bufferAllocator = new RootAllocator ()) {
249296 Table table = resource .getTable ();
0 commit comments