Skip to content

Commit d885f4a

Browse files
compheadandygrove
andauthored
chore: Adding an optional hdfs crate (#1377)
* Adding an optional `hdfs` crate * Update NOTICE.txt Co-authored-by: Andy Grove <[email protected]> * Update NOTICE.txt Co-authored-by: Andy Grove <[email protected]> --------- Co-authored-by: Andy Grove <[email protected]>
1 parent 11651d4 commit d885f4a

File tree

10 files changed

+952
-15
lines changed

10 files changed

+952
-15
lines changed

.github/actions/setup-builder/action.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ runs:
3434
run: |
3535
apt-get update
3636
apt-get install -y protobuf-compiler
37+
apt-get install -y clang
3738
3839
- name: Install JDK ${{inputs.jdk-version}}
3940
uses: actions/setup-java@v4

NOTICE.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@ This product includes software developed at
88
Apache Gluten (https://github.com/apache/incubator-gluten/)
99
Specifically:
1010
- Optimizer rule to replace SortMergeJoin with ShuffleHashJoin
11+
12+
This product includes software developed at
13+
DataFusion HDFS ObjectStore Contrib Package(https://github.com/datafusion-contrib/datafusion-objectstore-hdfs)

native/Cargo.lock

Lines changed: 132 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

native/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
# under the License.
1717

1818
[workspace]
19-
members = ["core", "spark-expr", "proto"]
19+
default-members = ["core", "spark-expr", "proto"]
20+
members = ["core", "spark-expr", "proto", "hdfs"]
2021
resolver = "2"
2122

2223
[workspace.package]
@@ -38,6 +39,8 @@ arrow-array = { version = "54.1.0" }
3839
arrow-buffer = { version = "54.1.0" }
3940
arrow-data = { version = "54.1.0" }
4041
arrow-schema = { version = "54.1.0" }
42+
async-trait = { version = "0.1" }
43+
bytes = { version = "1.10.0" }
4144
parquet = { version = "54.1.0", default-features = false, features = ["experimental"] }
4245
datafusion = { version = "45.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions"] }
4346
datafusion-common = { version = "45.0.0", default-features = false }

native/core/Cargo.toml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ parquet = { workspace = true, default-features = false, features = ["experimenta
4444
futures = { workspace = true }
4545
mimalloc = { version = "*", default-features = false, optional = true }
4646
tokio = { version = "1", features = ["rt-multi-thread"] }
47-
async-trait = "0.1"
47+
async-trait = { workspace = true }
4848
log = "0.4"
4949
log4rs = "1.2.0"
5050
thiserror = { workspace = true }
@@ -58,9 +58,9 @@ lz4_flex = { version = "0.11.3", default-features = false }
5858
zstd = "0.11"
5959
rand = { workspace = true}
6060
num = { workspace = true }
61-
bytes = "1.5.0"
61+
bytes = { workspace = true }
6262
tempfile = "3.8.0"
63-
itertools = "0.11.0"
63+
itertools = "0.14.0"
6464
paste = "1.0.14"
6565
datafusion-common = { workspace = true, features= ["object_store"] }
6666
datafusion = { workspace = true }
@@ -78,6 +78,7 @@ object_store = { workspace = true }
7878
url = { workspace = true }
7979
chrono = { workspace = true }
8080
parking_lot = "0.12.3"
81+
datafusion-comet-objectstore-hdfs = { path = "../hdfs", optional = true}
8182

8283
[dev-dependencies]
8384
pprof = { version = "0.14.0", features = ["flamegraph"] }
@@ -89,6 +90,11 @@ hex = "0.4.3"
8990

9091
[features]
9192
default = []
93+
hdfs=["datafusion-comet-objectstore-hdfs"]
94+
95+
# exclude optional packages from cargo machete verifications
96+
[package.metadata.cargo-machete]
97+
ignored = ["datafusion-comet-objectstore-hdfs"]
9298

9399
[lib]
94100
name = "comet"

native/hdfs/Cargo.toml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# This is an optional HDFS crate
19+
# To build it from root is required to provide a valid JAVA_HOME
20+
# and enable `hdfs` feature
21+
# Example: JAVA_HOME="/opt/homebrew/opt/openjdk@11" cargo build --features=hdfs
22+
23+
[package]
24+
name = "datafusion-comet-objectstore-hdfs"
25+
description = "Comet HDFS integration"
26+
version = { workspace = true }
27+
homepage = { workspace = true }
28+
repository = { workspace = true }
29+
authors = { workspace = true }
30+
readme = { workspace = true }
31+
license = { workspace = true }
32+
edition = { workspace = true }
33+
34+
[features]
35+
default = ["hdfs", "try_spawn_blocking"]
36+
hdfs = ["fs-hdfs"]
37+
hdfs3 = ["fs-hdfs3"]
38+
# Used for trying to spawn a blocking thread for implementing each object store interface when running in a tokio runtime
39+
try_spawn_blocking = []
40+
41+
[dependencies]
42+
async-trait = { workspace = true }
43+
bytes = { workspace = true }
44+
chrono = { workspace = true }
45+
fs-hdfs = { version = "^0.1.12", optional = true }
46+
fs-hdfs3 = { version = "^0.1.12", optional = true }
47+
futures = { workspace = true }
48+
object_store = { workspace = true }
49+
tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
50+
51+
[package.metadata.cargo-machete]
52+
ignored = ["fs-hdfs", "fs-hdfs3"]

0 commit comments

Comments
 (0)