Skip to content

Commit a9437ca

Browse files
remove tabledataset
1 parent 3f7aecf commit a9437ca

File tree

4 files changed

+34
-39
lines changed

4 files changed

+34
-39
lines changed

docs/src/containers/overview.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ MLDatasets.jl contains several reusable data containers for accessing datasets i
44

55
```@docs
66
FileDataset
7-
TableDataset
87
CachedDataset
98
MLDatasets.make_cache
109
```
@@ -13,6 +12,7 @@ MLDatasets.make_cache
1312
# TODO add back to docs when included again in the pkg
1413
HDF5Dataset
1514
Base.close(::HDF5Dataset)
15+
TableDataset
1616
JLD2Dataset
1717
Base.close(::JLD2Dataset)
1818
-->

src/MLDatasets.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ include("download.jl")
4545

4646
include("containers/filedataset.jl")
4747
export FileDataset
48-
include("containers/tabledataset.jl")
49-
export TableDataset
5048
include("containers/cacheddataset.jl")
5149
export CachedDataset
50+
# include("containers/tabledataset.jl")
51+
# export TableDataset
5252

5353
## TODO add back when compatible with `@lazy` or `@require`
5454
## which means that they cannot dispatch on types from JLD2 and HDF5

src/containers/tabledataset.jl

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,41 +21,36 @@ end
2121
TableDataset(table::T) where {T} = TableDataset{T}(table)
2222
TableDataset(path::AbstractString) = TableDataset(read_csv(path))
2323

24-
# see https://github.com/JuliaML/MLUtils.jl/issues/67
25-
# Assume the table provides a size and indexing interface (DataFrame does)
26-
# otherwise have to resort to very slow fallbacks
27-
numobs_table(x) = size(x, 1)
28-
getobs_table(x, i) = x[i, :]
29-
30-
# # slow accesses based on Tables.jl
31-
# _getobs_row(x, i) = first(Iterators.peel(Iterators.drop(x, i - 1)))
32-
# function _getobs_column(x, i)
33-
# colnames = Tuple(Tables.columnnames(x))
34-
# rowvals = ntuple(j -> Tables.getcolumn(x, j)[i], length(colnames))
35-
36-
# return NamedTuple{colnames}(rowvals)
37-
# end
38-
39-
# function getobs_table(table, i)
40-
# if Tables.rowaccess(table)
41-
# return _getobs_row(Tables.rows(table), i)
42-
# elseif Tables.columnaccess(table)
43-
# return _getobs_column(table, i)
44-
# else
45-
# error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
46-
# end
47-
# end
48-
49-
# function numobs_table(table)
50-
# if Tables.columnaccess(table)
51-
# return length(Tables.getcolumn(table, 1))
52-
# elseif Tables.rowaccess(table)
53-
# # length might not be defined, but has to be for this to work.
54-
# return length(Tables.rows(table))
55-
# else
56-
# error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
57-
# end
58-
# end
24+
25+
# slow accesses based on Tables.jl
26+
_getobs_row(x, i) = first(Iterators.peel(Iterators.drop(x, i - 1)))
27+
function _getobs_column(x, i)
28+
colnames = Tuple(Tables.columnnames(x))
29+
rowvals = ntuple(j -> Tables.getcolumn(x, j)[i], length(colnames))
30+
31+
return NamedTuple{colnames}(rowvals)
32+
end
33+
34+
function getobs_table(table, i)
35+
if Tables.rowaccess(table)
36+
return _getobs_row(Tables.rows(table), i)
37+
elseif Tables.columnaccess(table)
38+
return _getobs_column(table, i)
39+
else
40+
error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
41+
end
42+
end
43+
44+
function numobs_table(table)
45+
if Tables.columnaccess(table)
46+
return length(Tables.getcolumn(table, 1))
47+
elseif Tables.rowaccess(table)
48+
# length might not be defined, but has to be for this to work.
49+
return length(Tables.rows(table))
50+
else
51+
error("The Tables.jl implementation used should have either rowaccess or columnaccess.")
52+
end
53+
end
5954

6055
Base.getindex(dataset::TableDataset, i) = getobs_table(dataset.table, i)
6156
Base.length(dataset::TableDataset) = numobs_table(dataset.table)

test/runtests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ no_ci_dataset_tests = [
3232

3333
container_tests = [
3434
"containers/filedataset.jl",
35-
"containers/tabledataset.jl",
35+
# "containers/tabledataset.jl",
3636
# "containers/hdf5dataset.jl",
3737
# "containers/jld2dataset.jl",
3838
"containers/cacheddataset.jl",

0 commit comments

Comments
 (0)