Skip to content

Commit 32b473e

Browse files
ImageShow and docs
1 parent daef202 commit 32b473e

18 files changed

+84
-1189
lines changed

Project.toml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
name = "MLDatasets"
22
uuid = "eb30cadb-4394-5ae3-aed4-317e484a6458"
3-
version = "0.6.0"
3+
version = "0.7.0"
44

55
[deps]
66
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
7-
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
87
DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
98
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
109
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
@@ -13,7 +12,7 @@ FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
1312
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
1413
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
1514
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
16-
ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
15+
ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31"
1716
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
1817
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
1918
LazyModules = "8cdb02fc-e678-4876-92c5-9defec4f444e"
@@ -27,15 +26,14 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
2726

2827
[compat]
2928
CSV = "0.10.2"
30-
ColorTypes = "0.11"
3129
DataDeps = "0.7"
3230
DataFrames = "1.3"
3331
FileIO = "1.14"
3432
FixedPointNumbers = "0.8"
3533
GZip = "0.5"
3634
Glob = "1.3"
3735
HDF5 = "0.16.2"
38-
ImageCore = "0.9"
36+
ImageShow = "0.3"
3937
JLD2 = "0.4.21"
4038
JSON3 = "1"
4139
LazyModules = "0.3"
@@ -48,7 +46,8 @@ Tables = "1.6"
4846
julia = "1.6"
4947

5048
[extras]
49+
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
5150
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
5251

5352
[targets]
54-
test = ["Test"]
53+
test = ["Test", "ColorTypes"]

docs/src/index.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ julia> summary(X_train)
7171
Input features are commonly denoted by `features`, while classification labels and regression targets are denoted by `targets`.
7272

7373
```julia-repl
74+
julia> using MLDatasets, DataFrames
75+
7476
julia> iris = Iris()
7577
dataset Iris:
7678
metadata => Dict{String, Any} with 4 entries
@@ -136,6 +138,38 @@ julia> iris.targets
136138
MLDatasets.jl garuantees compatibility with the [getobs](https://juliaml.github.io/MLUtils.jl/dev/api/#MLUtils.getobs) and [numobs](https://juliaml.github.io/MLUtils.jl/dev/api/#MLUtils.numobs) interface defined in [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl).
137139
In practice, applying `getobs` and `numobs` on datasets is equivalent to applying indexing and `length`.
138140

141+
## Conditional module loading
142+
143+
MLDatasets.jl relies on many different packages in order to load and process the diverse type of
144+
datasets it supports. Most likely, any single user of the library will use a limited subset
145+
of these functionalities. In order to reduce the time taken by `using MLDatasets` in users' code,
146+
we use a [lazy import system](https://github.com/johnnychen94/LazyModules.jl) that defers the import of packages inside MLDatasets.jl as much as possible.
147+
For some of the packages like (e.g. `DataFrames.jl`) some manual intervention is needed from the user.
148+
As an example, the following code will produce an error:
149+
```julia-repl
150+
julia> using MLDataset
151+
152+
julia> MNIST(); # fine, MNIST doesn't requ
153+
```
154+
```julia-repl
155+
julia> using MLDataset
156+
157+
julia> MNIST(); # fine, MNIST doesn't require DataFrames
158+
159+
julia> Iris() # ERROR: Add `import DataFrames` or `using DataFrames` to your code to unlock this functionality.
160+
```
161+
As recommended by the error message we can easily fix the error with an additional import:
162+
```
163+
julia> using MLDataset, DataFrames
164+
165+
julia> Iris()
166+
dataset Iris:
167+
metadata => Dict{String, Any} with 4 entries
168+
features => 150×4 DataFrame
169+
targets => 150×1 DataFrame
170+
dataframe => 150×5 DataFrame
171+
```
172+
139173
## Download location
140174

141175
MLDatasets.jl is build on top of the package

src/MLDatasets.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ using Tables
66
using DataDeps
77
import MLUtils
88
using MLUtils: getobs, numobs, AbstractDataContainer
9-
using ColorTypes
109
using Glob
1110
using DelimitedFiles: readdlm
1211
using FileIO
@@ -22,7 +21,7 @@ include("require.jl") # export @require
2221

2322
@require import JSON3="0f8b85d8-7281-11e9-16c2-39a750bddbf1"
2423
@require import DataFrames="a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
25-
@lazy import ImageCore="a09fc81d-aa75-5fe9-8630-4744c3626534"
24+
@require import ImageShow="4e3cecfd-b093-5904-9786-8bbb286a6a31"
2625
# @lazy import NPZ # lazy imported by FileIO
2726
@lazy import Pickle="fbb45041-c46e-462f-888f-7c521cafbc2c"
2827
@lazy import MAT="23992714-dd62-5051-b70f-ba57cb901cac"

src/containers/tabledataset.jl

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,35 +21,41 @@ end
2121
TableDataset(table::T) where {T} = TableDataset{T}(table)
2222
TableDataset(path::AbstractString) = TableDataset(read_csv(path))
2323

24-
# slow accesses based on Tables.jl
25-
_getobs_row(x, i) = first(Iterators.peel(Iterators.drop(x, i - 1)))
26-
function _getobs_column(x, i)
27-
colnames = Tuple(Tables.columnnames(x))
28-
rowvals = ntuple(j -> Tables.getcolumn(x, j)[i], length(colnames))
24+
# see https://github.com/JuliaML/MLUtils.jl/issues/67
25+
# Assume the table provides a size and indexing interface (DataFrame does)
26+
# otherwise have to resort to very slow fallbacks
27+
numobs_table(x) = size(x, 1)
28+
getobs_table(x, i) = x[i, :]
2929

30-
return NamedTuple{colnames}(rowvals)
31-
end
30+
# # slow accesses based on Tables.jl
31+
# _getobs_row(x, i) = first(Iterators.peel(Iterators.drop(x, i - 1)))
32+
# function _getobs_column(x, i)
33+
# colnames = Tuple(Tables.columnnames(x))
34+
# rowvals = ntuple(j -> Tables.getcolumn(x, j)[i], length(colnames))
3235

33-
function getobs_table(table, i)
34-
if Tables.rowaccess(table)
35-
return _getobs_row(Tables.rows(table), i)
36-
elseif Tables.columnaccess(table)
37-
return _getobs_column(table, i)
38-
else
39-
error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
40-
end
41-
end
36+
# return NamedTuple{colnames}(rowvals)
37+
# end
4238

43-
function numobs_table(table)
44-
if Tables.columnaccess(table)
45-
return length(Tables.getcolumn(table, 1))
46-
elseif Tables.rowaccess(table)
47-
# length might not be defined, but has to be for this to work.
48-
return length(Tables.rows(table))
49-
else
50-
error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
51-
end
52-
end
39+
# function getobs_table(table, i)
40+
# if Tables.rowaccess(table)
41+
# return _getobs_row(Tables.rows(table), i)
42+
# elseif Tables.columnaccess(table)
43+
# return _getobs_column(table, i)
44+
# else
45+
# error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
46+
# end
47+
# end
48+
49+
# function numobs_table(table)
50+
# if Tables.columnaccess(table)
51+
# return length(Tables.getcolumn(table, 1))
52+
# elseif Tables.rowaccess(table)
53+
# # length might not be defined, but has to be for this to work.
54+
# return length(Tables.rows(table))
55+
# else
56+
# error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
57+
# end
58+
# end
5359

5460
Base.getindex(dataset::TableDataset, i) = getobs_table(dataset.table, i)
5561
Base.length(dataset::TableDataset) = numobs_table(dataset.table)

src/datasets/text/udenglish.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ function __init__udenglish()
2222
detail on the Website.
2323
""",
2424
"https://raw.githubusercontent.com/UniversalDependencies/UD_English-EWT/master/" .* [TRAINFILE, DEVFILE, TESTFILE],
25-
"6df6ee25ab3cd1cde3a09ab075dcc6b8c90d18648eef0809f400be4ad8bc81e2"
2625
))
2726
end
2827

src/datasets/vision/cifar10.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ convert2image(::Type{<:CIFAR10}, x::AbstractArray{<:Integer}) =
165165
function convert2image(::Type{<:CIFAR10}, x::AbstractArray{T,N}) where {T,N}
166166
@assert N == 3 || N == 4
167167
x = permutedims(x, (3, 2, 1, 4:N...))
168-
return ImageCore.colorview(RGB, x)
168+
ImageCore = ImageShow.ImageCore
169+
return ImageCore.colorview(ImageCore.RGB, x)
169170
end
170171

171172

src/datasets/vision/mnist.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ convert2image(::Type{<:MNIST}, x::AbstractArray{<:Integer}) =
141141
function convert2image(::Type{<:MNIST}, x::AbstractArray{T,N}) where {T,N}
142142
@assert N == 2 || N == 3
143143
x = permutedims(x, (2, 1, 3:N...))
144-
return ImageCore.colorview(Gray, x)
144+
ImageCore = ImageShow.ImageCore
145+
return ImageCore.colorview(ImageCore.Gray, x)
145146
end
146147

147148
# DEPRECATED INTERFACE, REMOVE IN v0.7 (or 0.6.x)

src/utils.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@ bytes_to_type(::Type{T}, A::Array{UInt8}) where T<:Integer = convert(Array{T}, A
3636
bytes_to_type(::Type{T}, A::Array{UInt8}) where T<:AbstractFloat = A ./ T(255)
3737
bytes_to_type(::Type{T}, A::Array{UInt8}) where T<:Number = convert(Array{T}, reinterpret(N0f8, A))
3838

39-
# see https://github.com/JuliaML/MLUtils.jl/issues/67
40-
numobs_table(x) = size(x, 1)
41-
getobs_table(x, i) = x[i, :]
4239

4340
function clean_nt(nt::NamedTuple)
4441
res = (; (p for p in pairs(nt) if p[2] !== nothing)...)

test.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
using TestImages
2+
x = testimage("cameraman")
3+
using ImageShow
4+
x

test/datasets/graphs_deprecated.jl

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)