Skip to content

Commit 1f3c1aa

Browse files
committed
get rid of separate type
1 parent 121b122 commit 1f3c1aa

File tree

4 files changed

+91
-109
lines changed

4 files changed

+91
-109
lines changed

docs/src/reference/datasets.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,16 @@ The versions are indexed with a linear list of integers starting from `1`.
4141
## Reference
4242

4343
```@docs
44-
JuliaHub.Dataset
45-
JuliaHub.DatasetVersion
46-
JuliaHub.datasets
47-
JuliaHub.DatasetReference
48-
JuliaHub.dataset
49-
JuliaHub.download_dataset
50-
JuliaHub.upload_dataset
51-
JuliaHub.update_dataset
52-
JuliaHub.delete_dataset
44+
Dataset
45+
DatasetVersion
46+
datasets
47+
DatasetReference
48+
dataset
49+
download_dataset
50+
upload_dataset
51+
update_dataset
52+
delete_dataset
53+
DatasetProjectLink
5354
```
5455

5556
## Index

docs/src/reference/projects.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ julia> JuliaHub.current_authentication()
3333
## Reference
3434

3535
```@docs
36-
ProjectDataset
3736
project_datasets
3837
project_dataset
3938
upload_project_dataset

src/datasets.jl

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
const _DOCS_nondynamic_datasets_object_warning = """
22
!!! warning "Non-dynamic dataset objects"
33
4-
[`Dataset`](@ref) and [`ProjectDataset`](@ref) objects represents the dataset metadata when the
5-
Julia object was created (e.g. with [`dataset`](@ref)), and are not automatically kept up to date.
6-
To refresh the dataset metadata, you can pass the existing [`Dataset`](@ref) to [`JuliaHub.dataset`](@ref),
7-
or [`ProjectDataset`](@ref) to [`project_dataset`](@ref).
4+
[`Dataset`](@ref) objects represents the dataset metadata when the Julia object was created
5+
(e.g. with [`dataset`](@ref)), and are not automatically kept up to date.
6+
To refresh the dataset metadata, you can pass an existing [`Dataset`](@ref) object
7+
to [`JuliaHub.dataset`](@ref) or [`project_dataset`](@ref).
88
"""
99

1010
Base.@kwdef struct _DatasetStorage
@@ -71,6 +71,25 @@ function Base.show(io::IO, ::MIME"text/plain", dsv::DatasetVersion)
7171
print(io, "\n size: ", dsv.size, " bytes")
7272
end
7373

74+
"""
75+
struct DatasetProjectLink
76+
77+
Holds the project-dataset link metadata for datasets that were accessed via a project
78+
(e.g. when using [`project_datasets`](@ref)).
79+
80+
- `.uuid :: UUID`: the UUID of the project
81+
- `.is_writable :: Bool`: whether the user has write access to the dataset via the
82+
this project
83+
84+
See also: [`project_dataset`](@ref), [`project_datasets`](@ref), [`upload_project_dataset`](@ref).
85+
86+
$(_DOCS_no_constructors_admonition)
87+
"""
88+
struct DatasetProjectLink
89+
uuid::UUIDs.UUID
90+
is_writable::Bool
91+
end
92+
7493
"""
7594
struct Dataset
7695
@@ -87,6 +106,13 @@ public API:
87106
- Fields to access user-provided dataset metadata:
88107
- `description :: String`: dataset description
89108
- `tags :: Vector{String}`: a list of tags
109+
- If the dataset was accessed via a project (e.g. via [`project_datasets`](@ref)), `.project` will
110+
contain project metadata (see also: [`DatasetProjectLink`](@ref)). Otherwise this field is `nothing`.
111+
- `project.uuid`: the UUID of the project
112+
- `project.is_writable`: whether the user has write access to the dataset via the
113+
this project
114+
Note that two `Dataset` objects are considered to be equal (i.e. `==`) regardless of the `.project`
115+
value -- it references the same dataset regardless of the project it was accessed in.
90116
91117
!!! note "Canonical fully qualified dataset name"
92118
@@ -108,6 +134,7 @@ Base.@kwdef struct Dataset
108134
# User-set metadata
109135
description::String
110136
tags::Vector{String}
137+
project::Union{DatasetProjectLink, Nothing}
111138
# Additional metadata, but not part of public API
112139
_last_modified::Union{Nothing, TimeZones.ZonedDateTime}
113140
_downloadURL::String
@@ -117,18 +144,38 @@ Base.@kwdef struct Dataset
117144
_json::Dict
118145
end
119146

120-
function Dataset(d::Dict)
147+
function Dataset(d::Dict; expected_project::Union{UUIDs.UUID, Nothing}=nothing)
121148
owner = d["owner"]["username"]
122149
name = d["name"]
123150
versions_json = _get_json_or(d, "versions", Vector, [])
124151
versions = sort([DatasetVersion(json; owner, name) for json in versions_json]; by=dsv -> dsv.id)
152+
project = if !isnothing(expected_project)
153+
project_json = _get_json(d, "project", Dict)
154+
project_json_uuid = UUIDs.UUID(
155+
_get_json(project_json, "project_id", String; msg=".project")
156+
)
157+
if project_json_uuid != expected_project
158+
msg = "Project UUID mismatch in dataset response: $(project_json_uuid), requested $(project)"
159+
throw(JuliaHubError(msg))
160+
end
161+
is_writable = _get_json(
162+
project_json,
163+
"is_writable",
164+
Bool;
165+
msg="Unable to parse .project in /datasets?project response",
166+
)
167+
DatasetProjectLink(project_json_uuid, is_writable)
168+
else
169+
nothing
170+
end
125171
Dataset(;
126172
uuid=UUIDs.UUID(d["id"]),
127173
name, owner, versions,
128174
dtype=d["type"],
129175
description=d["description"],
130176
size=d["size"],
131177
tags=d["tags"],
178+
project=project,
132179
_downloadURL=d["downloadURL"],
133180
_last_modified=_nothing_or(d["lastModified"]) do last_modified
134181
datetime_utc = Dates.DateTime(
@@ -151,7 +198,12 @@ function Base.propertynames(::Dataset)
151198
end
152199

153200
function Base.show(io::IO, d::Dataset)
154-
print(io, "JuliaHub.dataset((\"", d.owner, "\", \"", d.name, "\"))")
201+
dsref = string("(\"", d.owner, "\", \"", d.name, "\")")
202+
if isnothing(d.project)
203+
print(io, "JuliaHub.dataset(", dsref, ")")
204+
else
205+
print(io, "JuliaHub.project_dataset(", dsref, "; project=", d.project.uuid, ")")
206+
end
155207
end
156208

157209
function Base.show(io::IO, ::MIME"text/plain", d::Dataset)
@@ -162,6 +214,13 @@ function Base.show(io::IO, ::MIME"text/plain", d::Dataset)
162214
print(io, "\n versions: ", length(d.versions))
163215
print(io, "\n size: ", d.size, " bytes")
164216
isempty(d.tags) || print(io, "\n tags: ", join(d.tags, ", "))
217+
if !isnothing(d.project)
218+
print(
219+
io,
220+
"\n project: ", d.project.uuid, " ",
221+
d.project.is_writable ? "(writable)" : "(not writable)",
222+
)
223+
end
165224
end
166225

167226
function Base.:(==)(d1::Dataset, d2::Dataset)
@@ -331,7 +390,9 @@ function datasets(
331390
end
332391

333392
function _parse_dataset_list(
334-
datasets::Vector; username::Union{AbstractString, Nothing}=nothing
393+
datasets::Vector;
394+
username::Union{AbstractString, Nothing}=nothing,
395+
expected_project::Union{UUIDs.UUID, Nothing}=nothing,
335396
)::Vector{Dataset}
336397
# It might happen that some of the elements of the `datasets` array can not be parsed for some reason,
337398
# and the Dataset() constructor will throw. Rather than having `datasets` throw an error (as we would
@@ -348,8 +409,14 @@ function _parse_dataset_list(
348409
if !isnothing(username) && (dataset["owner"]["username"] != username)
349410
return nothing
350411
end
351-
return Dataset(dataset)
412+
return Dataset(dataset; expected_project)
352413
catch e
414+
# If we fail to parse the server response for a dataset, we should always get a JuliaHubError.
415+
# Other errors types might indicate e.g. code errors, so we don't want to swallow those
416+
# here, and instead throw immediately.
417+
if !isa(e, JuliaHubError)
418+
rethrow()
419+
end
353420
@debug "Invalid dataset in GET /datasets response" dataset exception = (
354421
e, catch_backtrace()
355422
)

src/projects.jl

Lines changed: 6 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -19,60 +19,6 @@ function _assert_projects_enabled(auth::Authentication)
1919
end
2020
end
2121

22-
"""
23-
struct ProjectDataset
24-
25-
A dataset object returned by the functions that return project dataset links.
26-
27-
Has the same fields as [`Dataset`](@ref) plus the following fields that are specific
28-
to project-dataset links:
29-
30-
- `project_uuid::UUID`: identifies the project in the context of which the dataset was listed
31-
- `is_writable :: Bool`: whether this dataset has been marked writable by the dataset owner
32-
"""
33-
struct ProjectDataset
34-
_dataset::Dataset
35-
project_uuid::UUIDs.UUID
36-
is_writable::Bool
37-
end
38-
39-
function Base.getproperty(pd::ProjectDataset, name::Symbol)
40-
dataset = getfield(pd, :_dataset)
41-
if name in fieldnames(ProjectDataset)
42-
return getfield(pd, name)
43-
elseif name in propertynames(dataset)
44-
return getproperty(dataset, name)
45-
else
46-
throw(ArgumentError("No property $name for ProjectDataset"))
47-
end
48-
end
49-
50-
function Base.show(io::IO, pd::ProjectDataset)
51-
print(
52-
io,
53-
"JuliaHub.project_dataset((\"",
54-
pd.owner,
55-
"\", \"",
56-
pd.name,
57-
"\"); project=\"",
58-
pd.project_uuid,
59-
"\")",
60-
)
61-
end
62-
function Base.show(io::IO, ::MIME"text/plain", pd::ProjectDataset)
63-
printstyled(io, "ProjectDataset:"; bold=true)
64-
print(io, " ", pd.name, " (", pd.dtype, ")")
65-
print(io, "\n owner: ", pd.owner)
66-
print(
67-
io, "\n project: ", pd.project_uuid, " ",
68-
pd.is_writable ? "(writable)" : "(not writable)",
69-
)
70-
print(io, "\n description: ", pd.description)
71-
print(io, "\n versions: ", length(pd.versions))
72-
print(io, "\n size: ", pd.size, " bytes")
73-
isempty(pd.tags) || print(io, "\n tags: ", join(pd.tags, ", "))
74-
end
75-
7622
"""
7723
const ProjectReference :: Type
7824
@@ -109,18 +55,18 @@ function _project_uuid(auth::Authentication, project::Union{ProjectReference, No
10955
end
11056

11157
"""
112-
JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> ProjectDataset
58+
JuliaHub.project_dataset(dataset::DatasetReference; [project::ProjectReference], [auth]) -> Dataset
11359
11460
Looks up the specified dataset among the datasets attached to the project, returning a
115-
[`ProjectDataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project
61+
[`Dataset`](@ref) object, or throwing an [`InvalidRequestError`](@ref) if the project
11662
does not have the dataset attached.
11763
11864
$(_DOCS_nondynamic_datasets_object_warning)
11965
"""
12066
function project_dataset end
12167

12268
function project_dataset(
123-
dataset::Union{Dataset, ProjectDataset};
69+
dataset::Dataset;
12470
project::Union{ProjectReference, Nothing}=nothing,
12571
auth::Authentication=__auth__(),
12672
)
@@ -170,7 +116,7 @@ end
170116
"""
171117
JuliaHub.project_datasets([project::ProjectReference]; [auth::Authentication]) -> Vector{Dataset}
172118
173-
Returns the list of datasets attached to the project, as a list of [`ProjectDataset`](@ref) objects.
119+
Returns the list of datasets attached to the project, as a list of [`Dataset`](@ref) objects.
174120
If the project is not explicitly specified, it uses the project of the authentication object.
175121
"""
176122
function project_datasets end
@@ -206,38 +152,7 @@ function _project_datasets(auth::Authentication, project::UUIDs.UUID)
206152
JuliaHub._throw_invalidresponse(r; msg="Unable to fetch datasets.")
207153
end
208154
datasets, _ = JuliaHub._parse_response_json(r, Vector)
209-
n_erroneous_datasets = 0
210-
datasets = map(_parse_dataset_list(datasets)) do dataset
211-
try
212-
project_json = _get_json(dataset._json, "project", Dict)
213-
project_json_uuid = _get_json(project_json, "project_id", String; msg=".project")
214-
if project_json_uuid != string(project)
215-
@debug "Invalid dataset in GET /datasets?project= response" dataset project_json_uuid project
216-
n_erroneous_datasets += 1
217-
return nothing
218-
end
219-
is_writable = _get_json(
220-
project_json,
221-
"is_writable",
222-
Bool;
223-
msg="Unable to parse .project in /datasets?project response",
224-
)
225-
return ProjectDataset(dataset, project, is_writable)
226-
catch e
227-
isa(e, JuliaHubError) || rethrow(e)
228-
@debug "Invalid dataset in GET /datasets?project= response" dataset exception = (
229-
e, catch_backtrace()
230-
)
231-
n_erroneous_datasets += 1
232-
return nothing
233-
end
234-
end
235-
if n_erroneous_datasets > 0
236-
@warn "The JuliaHub GET /datasets?project= response contains erroneous project datasets. Omitting $(n_erroneous_datasets) entries."
237-
end
238-
# We'll filter down to just ProjectDataset objects, and enforce
239-
# type-stability of the array type here.
240-
return ProjectDataset[pd for pd in datasets if isa(pd, ProjectDataset)]
155+
return _parse_dataset_list(datasets; expected_project=project)
241156
end
242157

243158
"""
@@ -266,7 +181,7 @@ Uploads a new version of a project-linked dataset.
266181
function upload_project_dataset end
267182

268183
function upload_project_dataset(
269-
ds::Union{Dataset, ProjectDataset},
184+
ds::Dataset,
270185
local_path::AbstractString;
271186
progress::Bool=true,
272187
project::Union{ProjectReference, Nothing}=nothing,

0 commit comments

Comments
 (0)