Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,50 @@ jobs:
continue-on-error: false
run: >
julia --color=yes --project=monorepo -e 'using Pkg; Pkg.test("Arrow")'
flight_interop:
name: Arrow Flight interop - Julia 1 - ubuntu-latest
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install Flight Python dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pyarrow grpcio grpcio-tools
- uses: julia-actions/setup-julia@v2
with:
version: '1'
- uses: actions/cache@v5
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-flight-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-flight-${{ env.cache-name }}-
${{ runner.os }}-flight-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1.6
with:
project: .
- name: Dev local ArrowTypes for Arrow.jl tests
shell: julia --project=. {0}
run: |
using Pkg
Pkg.develop(PackageSpec(path="src/ArrowTypes"))
- name: Run Arrow Flight interop tests
env:
ARROW_FLIGHT_PYTHON: ${{ env.pythonLocation }}/bin/python
run: >
julia --color=yes --project=test -e 'using Pkg;
Pkg.develop(PackageSpec(path="."));
Pkg.develop(PackageSpec(path="src/ArrowTypes"));
Pkg.instantiate();
using Test, Arrow;
include("test/flight.jl")'
docs:
name: Documentation
runs-on: ubuntu-latest
Expand Down
45 changes: 45 additions & 0 deletions .github/workflows/ci_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,48 @@ jobs:
continue-on-error: false
run: >
julia --color=yes --project=monorepo -e 'using Pkg; Pkg.test("Arrow")'
flight_interop:
name: Arrow Flight interop - Julia nightly - ubuntu-latest
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Install Flight Python dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pyarrow grpcio grpcio-tools
- uses: julia-actions/setup-julia@v2
with:
version: 'nightly'
arch: x64
- uses: actions/cache@v5
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-flight-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-flight-${{ env.cache-name }}-
${{ runner.os }}-flight-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1.6
with:
project: .
- name: Dev local ArrowTypes for Arrow.jl tests
shell: julia --project=. {0}
run: |
using Pkg
Pkg.develop(PackageSpec(path="src/ArrowTypes"))
- name: Run Arrow Flight interop tests
env:
ARROW_FLIGHT_PYTHON: ${{ env.pythonLocation }}/bin/python
run: >
julia --color=yes --project=test -e 'using Pkg;
Pkg.develop(PackageSpec(path="."));
Pkg.develop(PackageSpec(path="src/ArrowTypes"));
Pkg.instantiate();
using Test, Arrow;
include("test/flight.jl")'
17 changes: 16 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@ version = "2.8.1"

[deps]
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561"
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
ConcurrentUtilities = "f0e56b4a-5159-44fe-b623-3e5288b988bb"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429"
gRPCClient = "aaca4a50-36af-4a1d-b878-4c443f2061ad"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
Expand All @@ -37,6 +40,15 @@ TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[weakdeps]
gRPCServer = "608c6337-0d7d-447f-bb69-0f5674ee3959"

[extensions]
ArrowgRPCServerExt = "gRPCServer"

[sources]
ArrowTypes = { path = "src/ArrowTypes" }

[compat]
ArrowTypes = "1.1,2"
BitIntegers = "0.2, 0.3"
Expand All @@ -45,10 +57,13 @@ CodecZstd = "0.7, 0.8"
ConcurrentUtilities = "2"
DataAPI = "1"
EnumX = "1"
ProtoBuf = "~1.2.1"
gRPCClient = "1"
gRPCServer = "0.1"
PooledArrays = "0.5, 1.0"
SentinelArrays = "1"
StringViews = "1"
Tables = "1.1"
TimeZones = "1"
TranscodingStreams = "0.9.12, 0.10, 0.11"
julia = "1.9"
julia = "1.12"
30 changes: 29 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ The package can be installed by typing in the following in a Julia REPL:
julia> using Pkg; Pkg.add("Arrow")
```

Arrow.jl currently requires Julia `1.12+`.

## Local Development

When developing on Arrow.jl it is recommended that you run the following to ensure that any
Expand All @@ -49,20 +51,46 @@ changes to ArrowTypes.jl are immediately available to Arrow.jl without requiring
julia --project -e 'using Pkg; Pkg.develop(path="src/ArrowTypes")'
```

Current write-path notes:
* `Arrow.tobuffer` includes a direct single-partition fast path for eligible inputs
* `Arrow.tobuffer(Tables.partitioner(...))` also includes a targeted direct multi-record-batch path for single-column top-level strings and single-column non-missing binary/code-units columns
* `Arrow.write(io, Tables.partitioner(...))` now reuses that same targeted direct multi-record-batch path instead of always going through the legacy `Writer` orchestration
* multi-column partitions, dictionary-encoded top-level columns, map-heavy inputs, and missing-binary partitions retain the existing writer path

## Format Support

This implementation supports the 1.0 version of the specification, including support for:
* All primitive data types
* All nested data types
* Dictionary encodings and messages
* Dictionary-encoded `CategoricalArray` interop, including missing-value roundtrips through `Arrow.Table`, `copy`, and `DataFrame(...; copycols=true)`
* Extension types
* View-backed Utf8/Binary columns, including recovery from under-reported variadic buffer counts by inferring the required external buffers from valid view elements
* Streaming, file, record batch, and replacement and isdelta dictionary messages

It currently doesn't include support for:
* Tensors or sparse tensors
* Flight RPC
* C data interface

Flight RPC status:
* Experimental `Arrow.Flight` support is available in-tree
* Requires Julia `1.12+`
* Includes generated protocol bindings and complete client constructors for the `FlightService` RPC surface
* Keeps the top-level Flight module shell thin, with exports and generated-protocol setup split out of `src/flight/Flight.jl`
* Includes high-level `FlightData <-> Arrow IPC` helpers for `Arrow.Table`, `Arrow.Stream`, and DoPut payload generation
* Keeps the Flight IPC conversion layer modular under `src/flight/convert/`, with `src/flight/convert.jl` retained as a thin entrypoint
* Includes client helpers for request headers, binary metadata, handshake token reuse, and TLS configuration via `withheaders`, `withtoken`, and `authenticate`
* Keeps the Flight client implementation modular under `src/flight/client/`, with thin entrypoints at `src/flight/client.jl` and `src/flight/client/rpc_methods.jl`
* Includes a transport-agnostic server core (`Service`, `ServerCallContext`, `ServiceDescriptor`, `MethodDescriptor`) for local Flight method dispatch, path lookup, and handler testing
* Keeps the transport-agnostic server core modular under `src/flight/server/`, with `src/flight/server.jl` retained as a thin entrypoint
* Includes an optional `gRPCServer.jl` package extension that maps `Arrow.Flight.Service` into `gRPCServer.ServiceDescriptor` and registers Flight proto types with the external server package when it is present
* Keeps the optional `gRPCServer.jl` bridge modular under `ext/arrowgrpcserverext/`, with `ext/ArrowgRPCServerExt.jl` retained as a thin entrypoint
* Includes optional live interoperability coverage for `Handshake`, authenticated token propagation, `PollFlightInfo`, and TLS via dedicated Python reference servers
* Includes optional live `pyarrow.flight` interoperability coverage for `ListFlights`, `GetFlightInfo`, `GetSchema`, `DoGet`, `DoPut`, `DoExchange`, `ListActions`, and `DoAction`
* Keeps targeted Flight verification modular under `test/flight/`, with `test/flight.jl` retained as a thin entrypoint for local and CI invocation stability, the client-constructor/protocol-wrapper checks decomposed under `test/flight/client_surface/`, the optional `gRPCServer` extension scenarios decomposed under `test/flight/grpcserver_extension/`, the `pyarrow.flight` interop scenarios decomposed under `test/flight/pyarrow_interop/`, and the transport-agnostic server-core checks decomposed under `test/flight/server_core/`
* Includes `test/flight_grpcserver.jl` as a temporary-environment runner for optional native `gRPCServer` coverage without mutating `test/Project.toml`
* Dedicated CI jobs now exercise the Flight interop suite on stable and nightly Linux; native Julia server transport remains optional/experimental and is not part of the default Flight suite

Third-party data formats:
* CSV, parquet and avro support via the existing [CSV.jl](https://github.com/JuliaData/CSV.jl), [Parquet.jl](https://github.com/JuliaIO/Parquet.jl) and [Avro.jl](https://github.com/JuliaData/Avro.jl) packages
* Other Tables.jl-compatible packages automatically supported ([DataFrames.jl](https://github.com/JuliaData/DataFrames.jl), [JSONTables.jl](https://github.com/JuliaData/JSONTables.jl), [JuliaDB.jl](https://github.com/JuliaData/JuliaDB.jl), [SQLite.jl](https://github.com/JuliaDatabases/SQLite.jl), [MySQL.jl](https://github.com/JuliaDatabases/MySQL.jl), [JDBC.jl](https://github.com/JuliaDatabases/JDBC.jl), [ODBC.jl](https://github.com/JuliaDatabases/ODBC.jl), [XLSX.jl](https://github.com/felipenoris/XLSX.jl), etc.)
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

Manifest.toml
*/Manifest.toml
dev/release/apache-rat-*.jar
dev/release/filtered_rat.txt
dev/release/rat.xml
Expand Down
10 changes: 10 additions & 0 deletions docs/src/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,20 @@ One note on performance: when writing `TimeZones.ZonedDateTime` columns to the a
as the column has `ZonedDateTime` elements that all share a common timezone. This ensures the writing process can know "upfront" which timezone will be encoded and is thus much more
efficient and performant.

Similarly, `ArrowTypes.ToArrow` avoids repeated type-promotion work for
homogeneous custom columns even when `ArrowTypes.ArrowType(T)` is abstract, so
write-time conversion does not pay unnecessary overhead once the serialized
element type is stable.

#### Custom types

To support writing your custom Julia struct, Arrow.jl utilizes the format's mechanism for "extension types" by allowing the storing of Julia type name and metadata in the field metadata. To "hook in" to this machinery, custom types can utilize the interface methods defined in the `Arrow.ArrowTypes` submodule. For example:

Arrow.jl already uses this mechanism for several Base logical types, including
`nothing`, `Tuple`, `VersionNumber`, and `Complex`, so those values roundtrip as
their original Julia types instead of falling back to plain struct-shaped
`NamedTuple`s.

```julia
using Arrow

Expand Down
29 changes: 29 additions & 0 deletions ext/ArrowgRPCServerExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

module ArrowgRPCServerExt

using Arrow
using gRPCServer

include("arrowgrpcserverext/constants.jl")
include("arrowgrpcserverext/context.jl")
include("arrowgrpcserverext/streams.jl")
include("arrowgrpcserverext/handlers.jl")
include("arrowgrpcserverext/descriptor.jl")

end # module ArrowgRPCServerExt
20 changes: 20 additions & 0 deletions ext/arrowgrpcserverext/constants.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

const Flight = Arrow.Flight
const STREAM_BUFFER_SIZE = 16
const GENERATED_TYPE_PREFIX = "Arrow.Flight.Generated."
46 changes: 46 additions & 0 deletions ext/arrowgrpcserverext/context.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

function _method_type(method::Flight.MethodDescriptor)
if method.request_streaming
return method.response_streaming ? gRPCServer.MethodType.BIDI_STREAMING :
gRPCServer.MethodType.CLIENT_STREAMING
end
return method.response_streaming ? gRPCServer.MethodType.SERVER_STREAMING :
gRPCServer.MethodType.UNARY
end

function _call_context(context::gRPCServer.ServerContext)
headers = Flight.HeaderPair[
String(name) => (value isa String ? value : Vector{UInt8}(value)) for
(name, value) in pairs(context.metadata)
]
peer = string(context.peer.address, ":", context.peer.port)
return Flight.ServerCallContext(
headers=headers,
peer=peer,
secure=(context.peer.certificate !== nothing),
)
end

function _proto_type_name(T::Type)
type_name = string(T)
if startswith(type_name, GENERATED_TYPE_PREFIX)
return type_name[(ncodeunits(GENERATED_TYPE_PREFIX) + 1):end]
end
return type_name
end
39 changes: 39 additions & 0 deletions ext/arrowgrpcserverext/descriptor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

function _register_proto_types!(method::Flight.MethodDescriptor)
registry = gRPCServer.get_type_registry()
registry[_proto_type_name(method.request_type)] = method.request_type
registry[_proto_type_name(method.response_type)] = method.response_type
return nothing
end

function gRPCServer.service_descriptor(service::Flight.Service)
descriptor = Flight.servicedescriptor(service)
methods = Dict{String,gRPCServer.MethodDescriptor}()
for method in descriptor.methods
_register_proto_types!(method)
methods[method.name] = gRPCServer.MethodDescriptor(
method.name,
_method_type(method),
_proto_type_name(method.request_type),
_proto_type_name(method.response_type),
_handler(service, method),
)
end
return gRPCServer.ServiceDescriptor(descriptor.name, methods, nothing)
end
Loading