Get rid of cruft from FastAI.jl

darsnack · darsnack · commit ebc6938195e0 · 2022-02-23T08:21:43.000-06:00
diff --git a/src/containers/cacheddataset.jl b/src/containers/cacheddataset.jl
@@ -8,14 +8,15 @@ make_cache(source, cacheidx) = getobs(source, cacheidx)
 
 """
     CachedDataset(source, cachesize = numbobs(source))
+    CachedDataset(source, cacheidx = 1:numbobs(source))
     CachedDataset(source, cacheidx, cache)
 
 Wrap a `source` data container and cache `cachesize` samples in memory.
 This can be useful for improving read speeds when `source` is a lazy data container,
 but your system memory is large enough to store a sizeable chunk of it.
 
 By default the observation indices `1:cachesize` are cached.
-You can manually pass in a `cache` and set of `cacheidx` as well.
+You can manually pass in a set of `cacheidx` as well.
 
 See also [`make_cache`](@ref) for customizing the default cache creation for `source`.
 """
@@ -25,11 +26,9 @@ struct CachedDataset{T, S}
     cache::S
 end
 
-function CachedDataset(source, cachesize::Int = numobs(source))
-    cacheidx = 1:cachesize
-
+CachedDataset(source, cacheidx::AbstractVector{<:Integer} = 1:numobs(source)) =
     CachedDataset(source, collect(cacheidx), make_cache(source, cacheidx))
-end
+CachedDataset(source, cachesize::Int = numobs(source)) = CachedDataset(source, 1:cachesize)
 
 function Base.getindex(dataset::CachedDataset, i::Integer)
     _i = findfirst(==(i), dataset.cacheidx)
diff --git a/src/containers/filedataset.jl b/src/containers/filedataset.jl
@@ -1,8 +1,3 @@
-matches(re::Regex) = f -> matches(re, f)
-matches(re::Regex, f) = !isnothing(match(re, f))
-const RE_IMAGEFILE = r".*\.(gif|jpe?g|tiff?|png|webp|bmp)$"i
-isimagefile(f) = matches(RE_IMAGEFILE, f)
-
 """
     rglob(filepattern, dir = pwd(), depth = 4)
 
@@ -15,43 +10,26 @@ function rglob(filepattern = "*", dir = pwd(), depth = 4)
 end
 
 """
-    loadfile(file)
-
-Load a file from disk into the appropriate format.
-"""
-function loadfile(file::String)
-    if isimagefile(file)
-        # faster image loading
-        return FileIO.load(file, view = true)
-    elseif endswith(file, ".csv")
-        return DataFrame(CSV.File(file))
-    else
-        return FileIO.load(file)
-    end
-end
-loadfile(file::AbstractPath) = loadfile(string(file))
-
-"""
-    FileDataset([loadfn = loadfile,] paths)
-    FileDataset([loadfn = loadfile,] dir, pattern = "*", depth = 4)
+    FileDataset([loadfn = FileIO.load,] paths)
+    FileDataset([loadfn = FileIO.load,] dir, pattern = "*", depth = 4)
 
 Wrap a set of file `paths` as a dataset (traversed in the same order as `paths`).
 Alternatively, specify a `dir` and collect all paths that match a glob `pattern`
 (recursively globbing by `depth`). The glob order determines the traversal order.
 """
-struct FileDataset{F, T<:Union{AbstractPath, AbstractString}} <: AbstractDataContainer
+struct FileDataset{F, T<:AbstractString} <: AbstractDataContainer
     loadfn::F
     paths::Vector{T}
 end
 
-FileDataset(paths) = FileDataset(loadfile, paths)
+FileDataset(paths) = FileDataset(FileIO.load, paths)
 FileDataset(loadfn,
-            dir::Union{AbstractPath, AbstractString},
+            dir::AbstractString,
             pattern::AbstractString = "*",
             depth = 4) = FileDataset(loadfn, rglob(pattern, string(dir), depth))
-FileDataset(dir::Union{AbstractPath, AbstractString}, pattern::AbstractString = "*", depth = 4) =
-    FileDataset(loadfile, dir, pattern, depth)
+FileDataset(dir::AbstractString, pattern::AbstractString = "*", depth = 4) =
+    FileDataset(FileIO.load, dir, pattern, depth)
 
-Base.getindex(dataset::FileDataset, i::Integer) = loadfile(dataset.paths[i])
+Base.getindex(dataset::FileDataset, i::Integer) = dataset.loadfn(dataset.paths[i])
 Base.getindex(dataset::FileDataset, is::AbstractVector) = map(Base.Fix1(getobs, dataset), is)
 Base.length(dataset::FileDataset) = length(dataset.paths)
diff --git a/src/containers/hdf5dataset.jl b/src/containers/hdf5dataset.jl
@@ -5,7 +5,7 @@ function _check_hdf5_shapes(shapes)
 end
 
 """
-    HDF5Dataset(file::Union{AbstractString, AbstractPath}, paths)
+    HDF5Dataset(file::AbstractString, paths)
     HDF5Dataset(fid::HDF5.File, paths::Union{HDF5.Dataset, Vector{HDF5.Dataset}})
     HDF5Dataset(fid::HDF5.File, paths::Union{AbstractString, Vector{<:AbstractString}})
     HDF5Dataset(fid::HDF5.File, paths::Union{HDF5.Dataset, Vector{HDF5.Dataset}}, shapes)
@@ -38,8 +38,7 @@ HDF5Dataset(fid::HDF5.File, paths::Vector{HDF5.Dataset}) =
 HDF5Dataset(fid::HDF5.File, path::AbstractString) = HDF5Dataset(fid, fid[path])
 HDF5Dataset(fid::HDF5.File, paths::Vector{<:AbstractString}) =
     HDF5Dataset(fid, map(p -> fid[p], paths))
-HDF5Dataset(file::Union{AbstractString, AbstractPath}, paths) =
-    HDF5Dataset(h5open(file, "r"), paths)
+HDF5Dataset(file::AbstractString, paths) = HDF5Dataset(h5open(file, "r"), paths)
 
 _getobs_hdf5(dataset::HDF5.Dataset, ::Tuple{}, i) = read(dataset)
 function _getobs_hdf5(dataset::HDF5.Dataset, shape, i)
diff --git a/src/containers/jld2dataset.jl b/src/containers/jld2dataset.jl
@@ -1,7 +1,7 @@
 _check_jld2_nobs(nobs) = all(==(first(nobs)), nobs[2:end])
 
 """
-    JLD2Dataset(file::Union{AbstractString, AbstractPath}, paths)
+    JLD2Dataset(file::AbstractString, paths)
     JLD2Dataset(fid::JLD2.JLDFile, paths::Union{String, Vector{String}})
 
 Wrap several JLD2 datasets (`paths`) as a single dataset container.
@@ -25,8 +25,7 @@ struct JLD2Dataset{T<:JLD2.JLDFile, S<:Tuple} <: AbstractDataContainer
 end
 
 JLD2Dataset(file::JLD2.JLDFile, path::String) = JLD2Dataset(file, (path,))
-JLD2Dataset(file::Union{AbstractString, AbstractPath}, paths) =
-    JLD2Dataset(jldopen(file, "r"), paths)
+JLD2Dataset(file::AbstractString, paths) = JLD2Dataset(jldopen(file, "r"), paths)
 
 Base.getindex(dataset::JLD2Dataset{<:JLD2.JLDFile, <:NTuple{1}}, i) = getobs(only(dataset.paths), i)
 Base.getindex(dataset::JLD2Dataset, i) = map(Base.Fix2(getobs, i), dataset.paths)
diff --git a/src/containers/tabledataset.jl b/src/containers/tabledataset.jl
@@ -1,6 +1,6 @@
 """
     TableDataset(table)
-    TableDataset(path::Union{AbstractPath, AbstractString})
+    TableDataset(path::AbstractString)
 
 Wrap a Tables.jl-compatible `table` as a dataset container.
 Alternatively, specify the `path` to a CSV file directly
@@ -19,8 +19,7 @@ struct TableDataset{T} <: AbstractDataContainer
 end
 
 TableDataset(table::T) where {T} = TableDataset{T}(table)
-TableDataset(path::Union{AbstractPath, AbstractString}) =
-    TableDataset(DataFrame(CSV.File(path)))
+TableDataset(path::AbstractPath) = TableDataset(DataFrame(CSV.File(path)))
 
 # slow accesses based on Tables.jl
 _getobs_row(x, i) = first(Iterators.peel(Iterators.drop(x, i - 1)))