Skip to content

Commit ecda14d

Browse files
committed
Add extension for Arrow + compatibility for JSON.Object handling
1 parent 6a892cb commit ecda14d

File tree

4 files changed

+159
-1
lines changed

4 files changed

+159
-1
lines changed

Project.toml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
33
version = "1.0.0"
44

55
[deps]
6+
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
67
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
78
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
89
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
@@ -12,15 +13,25 @@ StructUtils = "ec057cc2-7a8d-4b58-b3b3-92acb9f63b42"
1213
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
1314
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
1415

16+
[weakdeps]
17+
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
18+
19+
[extensions]
20+
JSONArrowExt = ["ArrowTypes"]
21+
1522
[compat]
23+
Arrow = "2.8.0"
24+
ArrowTypes = "2.2"
1625
Parsers = "1, 2"
1726
PrecompileTools = "1"
1827
StructUtils = "2"
1928
julia = "1.9"
2029

2130
[extras]
31+
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
32+
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
2233
Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
2334
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2435

2536
[targets]
26-
test = ["Tar", "Test"]
37+
test = ["Arrow", "Tar", "Test"]

ext/JSONArrowExt.jl

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
module JSONArrowExt
2+
3+
using JSON, ArrowTypes
4+
5+
const JSON_ARROW_NAME = Symbol("JuliaLang.JSON.Object")
6+
7+
ArrowTypes.ArrowKind(::Type{<:JSON.Object}) = ArrowTypes.StructKind()
8+
9+
ArrowTypes.toarrow(x::JSON.Object) = (; x...)
10+
ArrowTypes.arrowname(::Type{<:JSON.Object}) = JSON_ARROW_NAME
11+
ArrowTypes.JuliaType(::Val{JSON_ARROW_NAME}) = JSON.Object{Symbol,Any}
12+
13+
ArrowTypes.fromarrowstruct(::Type{T}, ::Val{nms}, vals...) where {T <: JSON.Object, nms} =
14+
T(nms[i] => vals[i] for i in 1:length(nms))
15+
16+
ArrowTypes.ToArrow(x::AbstractArray{T}) where {T <: JSON.Object} = _toarrow(x)
17+
ArrowTypes.ToArrow(x::AbstractArray{Union{T,Missing}}) where {T <: JSON.Object} = _toarrow(x)
18+
ArrowTypes.ToArrow(x::JSON.Object) = toarrow(x)
19+
20+
function _toarrow(x::Union{AbstractArray{T}, AbstractArray{Union{T,Missing}}}) where {T<:JSON.Object}
21+
isempty(x) && return Missing[]
22+
x isa AbstractArray{Missing} && return x
23+
fields = JSON.Object{Symbol, Type}()
24+
seen_fields = Set{Symbol}()
25+
for (i, y) in enumerate(x)
26+
y === missing && continue
27+
current_fields = Set{Symbol}()
28+
for (k, vv) in y
29+
key = Symbol(k)
30+
push!(current_fields, key)
31+
v = toarrow(vv)
32+
vtype = typeof(v)
33+
existing_type = get(fields, key, nothing)
34+
if existing_type !== nothing
35+
if !(vtype <: existing_type)
36+
fields[key] = ArrowTypes.promoteunion(existing_type, vtype)
37+
end
38+
else
39+
if i == 1
40+
fields[key] = vtype
41+
else
42+
fields[key] = Union{vtype, Missing}
43+
end
44+
end
45+
end
46+
for field in seen_fields
47+
if !(field in current_fields)
48+
existing_type = fields[field]
49+
if !(Missing <: existing_type)
50+
fields[field] = Union{existing_type, Missing}
51+
end
52+
end
53+
end
54+
union!(seen_fields, current_fields)
55+
end
56+
for y in x
57+
obj = toarrow(y)
58+
obj === missing && continue
59+
for (field, _) in fields
60+
if !haskey(obj, field)
61+
existing_type = fields[field]
62+
if !(Missing <: existing_type)
63+
fields[field] = Union{existing_type, Missing}
64+
end
65+
end
66+
end
67+
end
68+
nms = Tuple(keys(fields))
69+
NT = NamedTuple{nms, Tuple{values(fields)...}}
70+
return ArrowTypes.ToArrow{NT,typeof(x)}(x)
71+
end
72+
73+
function ArrowTypes._convert(::Type{NamedTuple{nms,T}}, nt) where {nms,T}
74+
vals = Tuple((nt !== missing && haskey(nt, nm)) ? toarrow(getproperty(nt, nm)) : missing for nm in nms)
75+
return NamedTuple{nms,T}(vals)
76+
end
77+
78+
end # module

test/arrow.jl

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
using Test, JSON, Arrow
2+
3+
obj1 = JSON.parse("""
4+
{
5+
"int": 1,
6+
"float": 2.1
7+
}
8+
""")
9+
10+
obj2 = JSON.parse("""
11+
{
12+
"int": 1,
13+
"float": 2.1,
14+
"bool1": true,
15+
"bool2": false,
16+
"none": null,
17+
"str": "\\"hey there sailor\\"",
18+
"arr": [null, 1, "hey"],
19+
"arr2": [1.2, 3.4, 5.6]
20+
}
21+
""")
22+
23+
obj3 = JSON.parse("""
24+
{
25+
"int": 1,
26+
"float": 2.1,
27+
"bool1": true,
28+
"bool2": false,
29+
"none": null,
30+
"str": "\\"hey there sailor\\"",
31+
"obj": {
32+
"a": 1,
33+
"b": null,
34+
"c": [null, 1, "hey"],
35+
"d": [1.2, 3.4, 5.6]
36+
},
37+
"arr": [null, 1, "hey"],
38+
"arr2": [1.2, 3.4, 5.6]
39+
}
40+
""")
41+
42+
tbl = (; json=[obj1, obj2, obj3])
43+
44+
arrow = Arrow.Table(Arrow.tobuffer(tbl))
45+
@test arrow.json[1].int == 1
46+
@test arrow.json[1].float == 2.1
47+
48+
@test arrow.json[2].int == 1
49+
@test arrow.json[2].float == 2.1
50+
@test arrow.json[2].bool1 == true
51+
@test arrow.json[2].bool2 == false
52+
@test arrow.json[2].none === missing
53+
@test arrow.json[2].str == "\"hey there sailor\""
54+
@test isequal(arrow.json[2].arr, [missing, 1, "hey"])
55+
@test arrow.json[2].arr2 == [1.2, 3.4, 5.6]
56+
57+
@test arrow.json[3].int == 1
58+
@test arrow.json[3].float == 2.1
59+
@test arrow.json[3].bool1 == true
60+
@test arrow.json[3].bool2 == false
61+
@test arrow.json[3].none === missing
62+
@test arrow.json[3].str == "\"hey there sailor\""
63+
@test arrow.json[3].obj.a == 1
64+
@test arrow.json[3].obj.b === nothing
65+
@test isequal(arrow.json[3].obj.c, [missing, 1, "hey"])
66+
@test arrow.json[3].obj.d == [1.2, 3.4, 5.6]
67+
@test isequal(arrow.json[3].arr, [missing, 1, "hey"])
68+
@test arrow.json[3].arr2 == [1.2, 3.4, 5.6]

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ include(joinpath(dirname(pathof(JSON)), "../test/object.jl"))
44
include(joinpath(dirname(pathof(JSON)), "../test/lazy.jl"))
55
include(joinpath(dirname(pathof(JSON)), "../test/parse.jl"))
66
include(joinpath(dirname(pathof(JSON)), "../test/json.jl"))
7+
include(joinpath(dirname(pathof(JSON)), "../test/arrow.jl"))
78

89
function tar_files(tarball::String)
910
data = Dict{String, Vector{UInt8}}()

0 commit comments

Comments
 (0)