Skip to content

Commit fa6b9cc

Browse files
updates
1 parent 948de74 commit fa6b9cc

File tree

10 files changed

+84
-138
lines changed

10 files changed

+84
-138
lines changed

.github/dependabot.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
2+
version: 2
3+
updates:
4+
- package-ecosystem: "github-actions"
5+
directory: "/" # Location of package manifests
6+
schedule:
7+
interval: "weekly"

.github/workflows/CI.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
matrix:
2020
version:
2121
- '1.9'
22-
- '1' # add back when 1.10 is out
22+
- '1'
2323
- 'nightly'
2424
os:
2525
- ubuntu-latest

.github/workflows/CompatHelper.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
run: which julia
1616
continue-on-error: true
1717
- name: Install Julia, but only if it is not already available in the PATH
18-
uses: julia-actions/setup-julia@v1
18+
uses: julia-actions/setup-julia@v2
1919
with:
2020
version: '1'
2121
arch: ${{ runner.arch }}
@@ -41,5 +41,10 @@ jobs:
4141
shell: julia --color=yes {0}
4242
env:
4343
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44+
# This repo uses Documenter, so we can reuse our [Documenter SSH key](https://documenter.juliadocs.org/stable/man/hosting/walkthrough/).
45+
# If we didn't have one of those setup, we could configure a dedicated ssh deploy key `COMPATHELPER_PRIV` following https://juliaregistries.github.io/CompatHelper.jl/dev/#Creating-SSH-Key.
46+
# Either way, we need an SSH key if we want the PRs that CompatHelper creates to be able to trigger CI workflows themselves.
47+
# That is because GITHUB_TOKEN's can't trigger other workflows (see https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#using-the-github_token-in-a-workflow).
48+
# Check if you have a deploy key setup using these docs: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/reviewing-your-deploy-keys.
4449
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
4550
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}

.github/workflows/TagBot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
workflow_dispatch:
77
inputs:
88
lookback:
9-
default: 3
9+
default: "3"
1010
permissions:
1111
actions: read
1212
checks: read

CondaPkg.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels = ["conda-forge"]
44
# h5py = ""
55
# pillow = ">=9.1, <10"
66
# pyarrow = "==6.0.0"
7-
datasets = ">=2.12, <3"
8-
numpy = ">=1.20, <2"
7+
datasets = ">=3.0, <4"
8+
numpy = ">=2.0, <3"
99
pillow = ""
1010

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
1212

1313
[compat]
1414
CondaPkg = "0.2"
15-
DLPack = "0.1"
15+
DLPack = "0.3"
1616
ImageCore = "0.9, 0.10"
1717
MLUtils = "0.4.1"
1818
PythonCall = "0.9"

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,31 +23,31 @@ HuggingFaceDatasets.jl provides wrappers around types from the `datasets` python
2323
Check out the [examples/](https://github.com/JuliaGenAI/HuggingFaceDatasets.jl/tree/main/examples) folder for usage examples.
2424

2525
```julia
26+
julia> using HuggingFaceDatasets
27+
2628
julia> train_data = load_dataset("mnist", split = "train")
2729
Dataset({
2830
features: ['image', 'label'],
2931
num_rows: 60000
3032
})
3133

32-
# Indexing starts with 1.
33-
# Python types are returned by default.
3434
julia> train_data[1]
35-
Python: {'image': <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x7F04DE661CD0>, 'label': 5}
35+
Python: {'image': <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x3340B0290>, 'label': 5}
3636

3737
julia> length(train_data)
3838
60000
3939

40-
# Now we set the julia format
4140
julia> train_data = load_dataset("mnist", split = "train").with_format("julia");
4241

43-
# Returned observations are now julia objects
42+
# Returned observations are now julia objects
43+
4444
julia> train_data[1]
4545
Dict{String, Any} with 2 entries:
4646
"label" => 5
47-
"image" => Gray{N0f8}[Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0); Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0); ; Gray{N0f8}(0.0) Gray{N0f8}(0.0) ……
47+
"image" => Gray{N0f8}[0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]
4848

4949
julia> train_data[1:2]
5050
Dict{String, Vector} with 2 entries:
5151
"label" => [5, 0]
52-
"image" => ReinterpretArray{Gray{N0f8}, 2, UInt8, Matrix{UInt8}, false}[[Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0); Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gray{N0f8}(0.0) Gra
52+
"image" => ReinterpretArray{Gray{N0f8}, 2, UInt8, Matrix{UInt8}, false}[[0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; ; 0
5353
```

docs/Manifest.toml

Lines changed: 0 additions & 100 deletions
This file was deleted.

src/transforms.jl

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,60 @@
11

2-
# See https://github.com/cjdoris/PythonCall.jl/issues/172.
3-
function _pyconvert(x::Py)
2+
# # See https://github.com/cjdoris/PythonCall.jl/issues/172.
3+
# function _pyconvert(x::Py)
4+
# @show x
5+
# if pyisinstance(x, datasets.Dataset)
6+
# return Dataset(x)
7+
# elseif pyisinstance(x, datasets.DatasetDict)
8+
# return DatasetDict(x)
9+
# elseif pyisinstance(x, PIL.PngImagePlugin.PngImageFile) || pyisinstance(x, PIL.JpegImagePlugin.JpegImageFile)
10+
# @show x
11+
# a = numpy2jl(np.array(x))
12+
# if ndims(a) == 3 && size(a, 1) == 3
13+
# return colorview(RGB{N0f8}, a)
14+
# elseif ndims(a) == 2
15+
# return reinterpret(Gray{N0f8}, a)
16+
# else
17+
# error("Unknown image format")
18+
# end
19+
# elseif pyisinstance(x, np.ndarray)
20+
# return numpy2jl(x)
21+
# else
22+
# return pyconvert(Any, x)
23+
# end
24+
# end
25+
26+
# # # Do nothing on a non-Py object.
27+
# # _pyconvert(x) = x
28+
29+
"""
30+
py2jl(x)
31+
32+
Convert Python types to Julia types applying `pyconvert` recursively.
33+
"""
34+
py2jl
35+
36+
# py2jl recurses through pycanonicalize and converts through _pyconvert
37+
py2jl(x) = pyconvert(Any, x)
38+
39+
function py2jl(x::Py)
40+
# handle datasets
441
if pyisinstance(x, datasets.Dataset)
542
return Dataset(x)
643
elseif pyisinstance(x, datasets.DatasetDict)
744
return DatasetDict(x)
45+
# handle list, tuple, dict, and set
46+
elseif pyisinstance(x, pytype(pylist()))
47+
return [py2jl(x) for x in x]
48+
elseif pyisinstance(x, pytype(pytuple()))
49+
return tuple(py2jl(x) for x in x)
50+
elseif pyisinstance(x, pytype(pydict()))
51+
return Dict(py2jl(k) => py2jl(v) for (k, v) in x.items())
52+
elseif pyisinstance(x, pytype(pyset()))
53+
return Set(py2jl(x) for x in x)
54+
# handle numpy arrays
55+
elseif pyisinstance(x, np.ndarray)
56+
return numpy2jl(x)
57+
# handle PIL images
858
elseif pyisinstance(x, PIL.PngImagePlugin.PngImageFile) || pyisinstance(x, PIL.JpegImagePlugin.JpegImageFile)
959
a = numpy2jl(np.array(x))
1060
if ndims(a) == 3 && size(a, 1) == 3
@@ -14,30 +64,12 @@ function _pyconvert(x::Py)
1464
else
1565
error("Unknown image format")
1666
end
17-
elseif pyisinstance(x, np.ndarray)
18-
return numpy2jl(x)
67+
# handle other types
1968
else
2069
return pyconvert(Any, x)
2170
end
2271
end
2372

24-
# Do nothing on a non-Py object.
25-
_pyconvert(x) = x
26-
27-
"""
28-
py2jl(x)
29-
30-
Convert Python types to Julia types applying `pyconvert` recursively.
31-
"""
32-
py2jl
33-
34-
# py2jl recurses through pycanonicalize and converts through _pyconvert
35-
py2jl(x) = pycanonicalize(_pyconvert(x))
36-
37-
pycanonicalize(x) = x
38-
39-
pycanonicalize(x::PyList) = [py2jl(x) for x in x]
40-
pycanonicalize(x::PyDict) = Dict(py2jl(k) => py2jl(v) for (k, v) in pairs(x))
4173

4274
"""
4375
numpy2jl(x)

test/no_ci.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
using HuggingFaceDatasets, ImageShow, Test, ImageCore
2-
31
@testset "image classification" begin
42
@testset "cifar10" begin
53
ds = load_dataset("cifar10", split = "test").with_format("julia")
@@ -25,8 +23,12 @@ end
2523
ds = load_dataset("cppe-5", split = "test").with_format("julia")
2624
@test ds[1]["image"] isa AbstractMatrix{RGB{N0f8}}
2725
@test ds[1]["objects"] isa Dict{String, Vector}
28-
29-
@test ds[1:2]["image"] isa Vector{<:AbstractMatrix{RGB{N0f8}}}
26+
imgs = ds[1:2]["image"]
27+
@test imgs isa Vector{<:AbstractArray}
28+
@test imgs isa Vector{<:AbstractMatrix} broken=true
29+
@test imgs isa Vector{<:AbstractMatrix{RGB{N0f8}}} broken=true
30+
@test size(imgs[1]) == (1920, 1088)
31+
@test imgs[1] isa AbstractMatrix{RGB{N0f8}}
3032
@test ds[1:2]["objects"] isa Vector{Dict{String, Vector}}
3133
end
3234
end

0 commit comments

Comments
 (0)