JuliaML
diff --git a/‎docs/src/datasets/graphs.md
Lines changed: 1 addition & 1 deletion b/‎docs/src/datasets/graphs.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/src/index.md
Lines changed: 2 additions & 2 deletions b/‎docs/src/index.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/abstract_datasets.jl
Lines changed: 6 additions & 7 deletions b/‎src/abstract_datasets.jl
Lines changed: 6 additions & 7 deletions
diff --git a/‎src/datasets/graphs/citeseer.jl
Lines changed: 2 additions & 3 deletions b/‎src/datasets/graphs/citeseer.jl
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/datasets/graphs/cora.jl
Lines changed: 1 addition & 1 deletion b/‎src/datasets/graphs/cora.jl
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/datasets/graphs/karateclub.jl
Lines changed: 4 additions & 4 deletions b/‎src/datasets/graphs/karateclub.jl
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/datasets/graphs/ogbdataset.jl
Lines changed: 28 additions & 46 deletions b/‎src/datasets/graphs/ogbdataset.jl
Lines changed: 28 additions & 46 deletions
diff --git a/‎src/datasets/graphs/polblogs.jl
Lines changed: 1 addition & 1 deletion b/‎src/datasets/graphs/polblogs.jl
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/datasets/graphs/pubmed.jl
Lines changed: 1 addition & 1 deletion b/‎src/datasets/graphs/pubmed.jl
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/datasets/graphs/reddit.jl
Lines changed: 1 addition & 1 deletion b/‎src/datasets/graphs/reddit.jl
Lines changed: 1 addition & 1 deletion
@@ -2,7 +2,7 @@
 
 A collection of datasets with an underlying graph structure.
 Some of these datasets contain a single graph, that can be accessed
-with `dataset[]` or `dataset[1]`. Others contain many graphs, 
+with `dataset[:]` or `dataset[1]`. Others contain many graphs, 
 accessed through `dataset[i]`. Graphs are represented by the [`MLDatasets.Graph`](@ref) type.
 
 ## Index
 
@@ -38,7 +38,7 @@ Where possible, those types share a common interface (fields and methods).
 
 Once a dataset has been instantiated, e.g. by `dataset = MNIST()`,  
 an observation `i` can be retrieved using the indexing syntax `dataset[i]`.
-By indexing with no arguments, `dataset[]`, the whole set of observations is collected.
+By indexing with no arguments, `dataset[:]`, the whole set of observations is collected.
 The total number of observations is given by `length(dataset)`.
 
 For example you can load the training set of the [`MNIST`](@ref)
@@ -60,7 +60,7 @@ julia> trainset[1]  # return first observation as a NamedTuple
 (features = Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], 
  targets = 5)
 
-julia> X_train, y_train = trainset[] # return all observations
+julia> X_train, y_train = trainset[:] # return all observations
 (features = [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0;;; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0;;; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0;;; … ;;; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0;;; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0;;; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], 
  targets = [5, 0, 4, 1, 9, 2, 1, 3, 1, 4  …  9, 2, 9, 5, 1, 8, 3, 5, 6, 8])
 
 
@@ -4,13 +4,13 @@
 Super-type from which all datasets in MLDatasets.jl inherit.
 
 Implements the following functionality:
-- `getobs(d)` and `getobs(d, i)` falling back to `d[]` and `d[i]` 
+- `getobs(d)` and `getobs(d, i)` falling back to `d[:]` and `d[i]` 
 - Pretty printing.
 """
 abstract type AbstractDataset <: AbstractDataContainer end
 
 
-MLUtils.getobs(d::AbstractDataset) = d[]
+MLUtils.getobs(d::AbstractDataset) = d[:]
 MLUtils.getobs(d::AbstractDataset, i) = d[i]
 
 function Base.show(io::IO, d::D) where D <: AbstractDataset
@@ -58,11 +58,10 @@ a `features` and a `targets` fields.
 abstract type SupervisedDataset <: AbstractDataset end
 
 
-
 Base.length(d::SupervisedDataset) = numobs((d.features, d.targets))
 
 # We return named tuples
-Base.getindex(d::SupervisedDataset) = getobs((; d.features, d.targets)) 
+Base.getindex(d::SupervisedDataset, ::Colon) = getobs((; d.features, d.targets))
 Base.getindex(d::SupervisedDataset, i) = getobs((; d.features, d.targets), i)
 
 """
@@ -76,7 +75,7 @@ abstract type UnsupervisedDataset <: AbstractDataset end
 
 Base.length(d::UnsupervisedDataset) = numobs(d.features)
 
-Base.getindex(d::UnsupervisedDataset) = getobs(d.features) 
+Base.getindex(d::UnsupervisedDataset, ::Colon) = getobs(d.features)
 Base.getindex(d::UnsupervisedDataset, i) = getobs(d.features, i)
 
 
@@ -98,7 +97,7 @@ const FIELDS_SUPERVISED_TABLE = """
 
 const METHODS_SUPERVISED_TABLE = """
 - `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets. 
-- `dataset[]`: Return all observations as a named tuple of features and targets.
+- `dataset[:]`: Return all observations as a named tuple of features and targets.
 - `length(dataset)`: Number of observations.
 """
 
@@ -117,6 +116,6 @@ const FIELDS_SUPERVISED_ARRAY = """
 
 const METHODS_SUPERVISED_ARRAY = """
 - `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets. 
-- `dataset[]`: Return all observations as a named tuple of features and targets.
+- `dataset[:]`: Return all observations as a named tuple of features and targets.
 - `length(dataset)`: Number of observations.
 """
@@ -42,9 +42,8 @@ function CiteSeer(; dir=nothing, reverse_edges=true)
 end
 
 Base.length(d::CiteSeer) = length(d.graphs) 
-Base.getindex(d::CiteSeer) = d.graphs[1]
-Base.getindex(d::CiteSeer, i) = getindex(d.graphs, i)
-
+Base.getindex(d::CiteSeer, ::Colon) = d.graphs[1]
+Base.getindex(d::CiteSeer, i) = d.graphs[i]
 
 
 # DEPRECATED in v0.6.0
 
@@ -60,7 +60,7 @@ function Cora(; dir=nothing, reverse_edges=true)
 end
 
 Base.length(d::Cora) = length(d.graphs) 
-Base.getindex(d::Cora) = d.graphs[1]
+Base.getindex(d::Cora, ::Colon) = d.graphs[1]
 Base.getindex(d::Cora, i) = getindex(d.graphs, i)
 
 
 
@@ -1,9 +1,9 @@
 export KarateClub
 
 """
-    Zachary's Karate Club
+    KarateClub()
 
-The Karate Club dataset originally appeared in Ref [1].
+The Zachary's karate club dataset originally appeared in Ref [1].
 
 The network contains 34 nodes (members of the karate club).
 The nodes are connected by 78 undirected and unweighted edges.
@@ -69,5 +69,5 @@ function KarateClub()
 end
 
 Base.length(d::KarateClub) = length(d.graphs) 
-Base.getindex(d::KarateClub) = d.graphs[1]
-Base.getindex(d::KarateClub, i) = getindex(d.graphs, i)
+Base.getindex(d::KarateClub, ::Colon) = d.graphs[1]
+Base.getindex(d::KarateClub, i) = d.graphs[i]
@@ -42,67 +42,48 @@ end
 The collection of datasets from the [Open Graph Benchmark: Datasets for Machine Learning on Graphs](https://arxiv.org/abs/2005.00687)
 paper. 
 
-`name` is the name  of one of the dasets (listed [here](https://ogb.stanford.edu/docs/dataset_overview/))
+`name` is the name  of one of the datasets (listed [here](https://ogb.stanford.edu/docs/dataset_overview/))
 available for node prediction, edge prediction, or graph prediction tasks.
 
-The `OGBDataset` type stores the graphs internally as dictionary objects. 
-The key "edge_index" contains `2 x num_edges`, where the first and second
-column contain the source and target nodes of each edge respectively.
-
 # Examples
 
 ## Node prediction tasks
 
 ```julia-repl
-julia> data = OGBDataset("ogbn-arxiv")
-OGBDataset{Vector{Any}}:
-  name => ogbn-arxiv
-  path => /home/carlo/.julia/datadeps/OGBDataset/arxiv
-  metadata => Dict{String, Any} with 15 entries
-  graphs => 1-element Vector{Dict}
-  labels => 1-element Vector{Any}
-  split => Dict{String, Any} with 3 entries
-
+julia> d = OGBDataset("ogbn-arxiv")
+dataset OGBDataset:
+  name        =>    ogbn-arxiv
+  metadata    =>    Dict{String, Any} with 16 entries
+  graphs      =>    1-element Vector{MLDatasets.Graph}
+  targets     =>    nothing
+  split_idx   =>    (train = "90941-element Vector{Int64}", val = "29799-element Vector{Int64}", test = "48603-element Vector{Int64}")
+
+julia> data[:]
+Graph:
+  num_nodes   =>    169343
+  num_edges   =>    1166243
+  edge_index  =>    ("1166243-element Vector{Int64}", "1166243-element Vector{Int64}")
+  node_data   =>    (year = "1×169343 Matrix{Int64}", features = "128×169343 Matrix{Float32}", label = "1×169343 Matrix{Int64}")
+  edge_data   =>    nothing
 
 julia> data.metadata
-Dict{String, Any} with 15 entries:
+Dict{String, Any} with 16 entries:
+  "download_name"         => "arxiv"
   "num classes"           => 40
+  "num tasks"             => 1
   "binary"                => false
+  "url"                   => "http://snap.stanford.edu/ogb/data/nodeproppred/arxiv.zip"
+  "additional node files" => "node_year"
   "is hetero"             => false
+  "path"                  => "/home/carlo/.julia/datadeps/OGBDataset/arxiv"
   "eval metric"           => "acc"
   "task type"             => "multiclass classification"
-  "version"               => 1
-  "split"                 => "time"
-  "download_name"         => "arxiv"
-  "num tasks"             => 1
-  "url"                   => "http://snap.stanford.edu/ogb/data/nodeproppred/arxiv.zip"
-  "additional node files" => "node_year"
   "add_inverse_edge"      => false
   "has_node_attr"         => true
   "additional edge files" => nothing
+  "version"               => 1
   "has_edge_attr"         => false
-
-julia> data.split
-Dict{String, Any} with 3 entries:
-  "test_idx"  => [347, 399, 452, 481, 489, 491, 527, 538, 541, 603  …  169334, 169335, 169336, 169337, 169338, 169339, 169340, 169341, 169342, 169343]
-  "train_idx" => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10  …  169110, 169112, 169113, 169114, 169115, 169116, 169118, 169146, 169149, 169252]
-  "val_idx"   => [350, 358, 367, 383, 394, 422, 430, 436, 468, 470  …  169089, 169096, 169108, 169111, 169128, 169156, 169177, 169186, 169262, 169297]
-
-julia> length(data)
-1
-
-julia> graph, labels = data[1];
-
-julia> graph
-Dict{String, Any} with 6 entries:
-  "edge_index" => [104448 13092; 15859 47284; … ; 45119 162538; 45119 72718]
-  "edge_feat"  => nothing
-  "node_feat"  => Float32[-0.057943 -0.1245 … -0.138236 -0.029875; -0.05253 -0.070665 … 0.040885 0.268417; … ; -0.172796 -0.372111 … -0.041253 0.077647; -0.140059 -0.301036 … -0.376132 -0.091018]
-  "num_nodes"  => 169343
-  "node_year"  => [2013 2015 … 2020 2020]
-  "num_edges"  => 1166243
-
-julia> source, target = graph["edge_index][:,1], graph["edge_index][:,2];
+  "split"                 => "time"
 ```
 
 ## Edge prediction task
@@ -356,15 +337,16 @@ end
 
 function ogbdict2graph(d::Dict)
     edge_index = d["edge_index"][:,1], d["edge_index"][:,2] 
-    num_nodes, num_edges = d["num_nodes"], d["num_edges"]
+    num_nodes = d["num_nodes"]
     node_data = Dict(Symbol(k[6:end]) => v for (k,v) in d if startswith(k, "node_") && v !== nothing)
     edge_data = Dict(Symbol(k[6:end]) => v for (k,v) in d if startswith(k, "edge_") && k!="edge_index" && v !== nothing)
     node_data = isempty(node_data) ? nothing : (; node_data...)
     edge_data = isempty(edge_data) ? nothing : (; edge_data...)
-    return Graph(; num_nodes, num_edges,
-                 edge_index, node_data, edge_data)
+    return Graph(; num_nodes, edge_index, node_data, edge_data)
 end
 
 Base.length(data::OGBDataset) = length(data.graphs)
+Base.getindex(data::OGBDataset{Nothing}, ::Colon) = data.graphs
+Base.getindex(data::OGBDataset, ::Colon) = (; data.graphs, data.targets)
 Base.getindex(data::OGBDataset{Nothing}, i) = getobs(data.graphs, i)
 Base.getindex(data::OGBDataset, i) = getobs((; data.graphs, data.targets), i) 
@@ -51,5 +51,5 @@ function PolBlogs(; dir=nothing)
 end
 
 Base.length(d::PolBlogs) = length(d.graphs) 
-Base.getindex(d::PolBlogs) = d.graphs[1]
+Base.getindex(d::PolBlogs, ::Colon) = d.graphs[1]
 Base.getindex(d::PolBlogs, i) = getindex(d.graphs, i)
@@ -42,7 +42,7 @@ function PubMed(; dir=nothing, reverse_edges=true)
 end
 
 Base.length(d::PubMed) = length(d.graphs) 
-Base.getindex(d::PubMed) = d.graphs[1]
+Base.getindex(d::PubMed, ::Colon) = d.graphs[1]
 Base.getindex(d::PubMed, i) = getindex(d.graphs, i)
 
 
 
@@ -122,5 +122,5 @@ function Reddit(; full=true, dir=nothing)
 end
 
 Base.length(d::Reddit) = length(d.graphs) 
-Base.getindex(d::Reddit) = d.graphs
+Base.getindex(d::Reddit, ::Colon) = d.graphs
 Base.getindex(d::Reddit, i) = getindex(d.graphs, i)