Skip to content

Commit 1fbe5a5

Browse files
committed
[wip] integrate uuid type in gandiva
1 parent 9035d6c commit 1fbe5a5

File tree

11 files changed

+110
-8
lines changed

11 files changed

+110
-8
lines changed

c/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ under the License.
2222
<parent>
2323
<groupId>org.apache.arrow</groupId>
2424
<artifactId>arrow-java-root</artifactId>
25-
<version>18.3.0</version>
25+
<version>18.3.0-SNAPSHOT</version>
2626
</parent>
2727

2828
<artifactId>arrow-c-data</artifactId>

ci/scripts/jni_build.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ cmake \
6363
-DCMAKE_PREFIX_PATH="${arrow_install_dir}" \
6464
-DCMAKE_INSTALL_PREFIX="${prefix_dir}" \
6565
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD:-OFF}" \
66-
-DProtobuf_USE_STATIC_LIBS=ON \
6766
-GNinja \
6867
"${EXTRA_CMAKE_OPTIONS[@]}"
6968
cmake --build "${build_dir}"

ci/scripts/jni_macos_build.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ export ARROW_BUILD_TESTS
6767
export ARROW_DATASET
6868
: "${ARROW_GANDIVA:=ON}"
6969
export ARROW_GANDIVA
70-
: "${ARROW_ORC:=ON}"
70+
: "${ARROW_ORC:=OFF}"
7171
export ARROW_ORC
7272
: "${ARROW_PARQUET:=ON}"
7373
: "${ARROW_S3:=ON}"
@@ -125,7 +125,14 @@ if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then
125125
github_actions_group_end
126126
fi
127127

128-
export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
128+
# Don't set Protobuf_ROOT if it doesn't exist (when using bundled dependencies)
129+
# Instead, let CMake find the system protobuf
130+
if [ -d "${build_dir}/cpp/protobuf_ep-install" ]; then
131+
export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
132+
else
133+
# Use system protobuf - set library path explicitly
134+
export JAVA_JNI_CMAKE_ARGS="-DProtobuf_LIBRARY=/usr/local/lib/libprotobuf.dylib -DProtobuf_PROTOC_EXECUTABLE=/usr/local/bin/protoc"
135+
fi
129136
"${source_dir}/ci/scripts/jni_build.sh" \
130137
"${source_dir}" \
131138
"${install_dir}" \

dataset/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ under the License.
2222
<parent>
2323
<groupId>org.apache.arrow</groupId>
2424
<artifactId>arrow-java-root</artifactId>
25-
<version>18.3.0</version>
25+
<version>18.3.0-SNAPSHOT</version>
2626
</parent>
2727

2828
<artifactId>arrow-dataset</artifactId>

dataset/src/main/cpp/jni_wrapper.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,14 @@
2525
#include "arrow/c/helpers.h"
2626
#include "arrow/dataset/api.h"
2727
#include "arrow/dataset/file_base.h"
28+
#include "arrow/dataset/file_parquet.h"
29+
#include "arrow/dataset/file_ipc.h"
2830
#ifdef ARROW_CSV
2931
#include "arrow/dataset/file_csv.h"
3032
#endif
33+
#ifdef ARROW_JSON
34+
#include "arrow/dataset/file_json.h"
35+
#endif
3136
#include "arrow/filesystem/api.h"
3237
#include "arrow/filesystem/path_util.h"
3338
#include "arrow/engine/substrait/util.h"

gandiva/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ under the License.
2222
<parent>
2323
<groupId>org.apache.arrow</groupId>
2424
<artifactId>arrow-java-root</artifactId>
25-
<version>18.3.0</version>
25+
<version>18.3.0-SNAPSHOT</version>
2626
</parent>
2727

2828
<groupId>org.apache.arrow.gandiva</groupId>

gandiva/proto/gandiva/types.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ message ExtGandivaType {
8585
optional TimeUnit timeUnit = 6; // used by TIME32/TIME64
8686
optional string timeZone = 7; // used by TIMESTAMP
8787
optional IntervalType intervalType = 8; // used by INTERVAL
88+
optional string extensionName = 9; // used by extension types (e.g., "uuid")
8889
}
8990

9091
message Field {

gandiva/src/main/cpp/expression_registry_helper.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include <memory>
1919

20+
#include <arrow/extension_type.h>
2021
#include <arrow/util/logging.h>
2122
#include <gandiva/arrow.h>
2223
#include <gandiva/expression_registry.h>
@@ -42,6 +43,13 @@ gandiva::types::TimeUnit MapTimeUnit(arrow::TimeUnit::type& unit) {
4243
}
4344

4445
void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_data_type) {
46+
// Handle extension types by preserving extension name and using storage type
47+
if (type->id() == arrow::Type::EXTENSION) {
48+
auto ext_type = std::dynamic_pointer_cast<arrow::ExtensionType>(type);
49+
gandiva_data_type->set_extensionname(ext_type->extension_name());
50+
type = ext_type->storage_type();
51+
}
52+
4553
switch (type->id()) {
4654
case arrow::Type::BOOL:
4755
gandiva_data_type->set_type(gandiva::types::GandivaType::BOOL);
@@ -85,6 +93,13 @@ void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_d
8593
case arrow::Type::BINARY:
8694
gandiva_data_type->set_type(gandiva::types::GandivaType::BINARY);
8795
break;
96+
case arrow::Type::FIXED_SIZE_BINARY: {
97+
gandiva_data_type->set_type(gandiva::types::GandivaType::FIXED_SIZE_BINARY);
98+
std::shared_ptr<arrow::FixedSizeBinaryType> fixed_size_binary_type =
99+
std::dynamic_pointer_cast<arrow::FixedSizeBinaryType>(type);
100+
gandiva_data_type->set_width(fixed_size_binary_type->byte_width());
101+
break;
102+
}
88103
case arrow::Type::DATE32:
89104
gandiva_data_type->set_type(gandiva::types::GandivaType::DATE32);
90105
break;

gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,18 @@ private static Set<FunctionSignature> getSupportedFunctionsFromGandiva() throws
126126
}
127127

128128
private static ArrowType getArrowType(ExtGandivaType type) {
129+
// Check if this is an extension type
130+
if (type.hasExtensionName() && !type.getExtensionName().isEmpty()) {
131+
String extensionName = type.getExtensionName();
132+
133+
// Handle known extension types
134+
if ("arrow.uuid".equals(extensionName)) {
135+
// this should be the new Arrow UUID type from: https://github.com/apache/arrow-java/pull/903
136+
return new UuidType();
137+
}
138+
throw new UnsupportedOperationException("Cannot get ArrowType for unknown extension type: " + extensionName);
139+
}
140+
129141
switch (type.getType().getNumber()) {
130142
case GandivaType.BOOL_VALUE:
131143
return ArrowType.Bool.INSTANCE;
@@ -155,6 +167,8 @@ private static ArrowType getArrowType(ExtGandivaType type) {
155167
return new ArrowType.Utf8();
156168
case GandivaType.BINARY_VALUE:
157169
return new ArrowType.Binary();
170+
case GandivaType.FIXED_SIZE_BINARY_VALUE:
171+
return new ArrowType.FixedSizeBinary(type.getWidth());
158172
case GandivaType.DATE32_VALUE:
159173
return new ArrowType.Date(DateUnit.DAY);
160174
case GandivaType.DATE64_VALUE:
@@ -171,7 +185,6 @@ private static ArrowType getArrowType(ExtGandivaType type) {
171185
return new ArrowType.Decimal(0, 0, 128);
172186
case GandivaType.INTERVAL_VALUE:
173187
return new ArrowType.Interval(mapArrowIntervalUnit(type.getIntervalType()));
174-
case GandivaType.FIXED_SIZE_BINARY_VALUE:
175188
case GandivaType.MAP_VALUE:
176189
case GandivaType.DICTIONARY_VALUE:
177190
case GandivaType.LIST_VALUE:
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.gandiva.evaluator;
18+
19+
import org.apache.arrow.memory.BufferAllocator;
20+
import org.apache.arrow.vector.FieldVector;
21+
import org.apache.arrow.vector.FixedSizeBinaryVector;
22+
import org.apache.arrow.vector.types.pojo.ArrowType;
23+
import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
24+
import org.apache.arrow.vector.types.pojo.FieldType;
25+
26+
/** UUID extension type for Gandiva. THIS SHOULD NOT BE INCLUDED!!!*/
27+
public class UuidType extends ExtensionType {
28+
29+
@Override
30+
public ArrowType storageType() {
31+
return new ArrowType.FixedSizeBinary(16);
32+
}
33+
34+
@Override
35+
public String extensionName() {
36+
return "arrow.uuid";
37+
}
38+
39+
@Override
40+
public boolean extensionEquals(ExtensionType other) {
41+
return other instanceof UuidType;
42+
}
43+
44+
@Override
45+
public ArrowType deserialize(ArrowType storageType, String serializedData) {
46+
if (!storageType.equals(storageType())) {
47+
throw new UnsupportedOperationException(
48+
"Cannot construct UuidType from underlying type " + storageType);
49+
}
50+
return new UuidType();
51+
}
52+
53+
@Override
54+
public String serialize() {
55+
return "";
56+
}
57+
58+
@Override
59+
public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
60+
return new FixedSizeBinaryVector(name, allocator, 16);
61+
}
62+
}

0 commit comments

Comments
 (0)