Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DataSets"
uuid = "c9661210-8a83-48f0-b833-72e62abce419"
authors = ["Chris Foster <[email protected]> and contributors"]
version = "0.2.11"
version = "0.2.12"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
32 changes: 27 additions & 5 deletions src/DataSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,49 @@ end
Check whether a dataset name is valid.

Valid names must start with a letter or a number, the rest of the name can also contain `-`
and `_` characters. The names can also be hieracicial, with segments separated by forward
slashes (`/`). Each segment must also start with either a letter or a number. For example:
and `_` characters. The names can also be hierarchical, with segments separated by forward
slashes (`/`) or (`.`). Each segment must also start with either a letter or a number.

For example, the following dataset names are valid:

my_data
my_data_1
username/data
organization_name/project-name/data
123user/456dataset--name
username/my_table.csv
dataset/v0.1.2

whereas names like this are invalid:

__mydata__
username/.git
my...dataset

!!! note "Segment separators"

In dataset names, both `/` and `.` are considered segment separators from a syntax
perspective. While DataSets.jl does not impose any specific interpretation on the
dataset name, it is recommended to use `/` to separate segments from a semantic
perspective, and to interpret each forward-slash-separated segment as a path separator.
Periods would conventionally be used to separate file extensions within a segment.

E.g. use `username/my-project-data/population.csv`, rather than
`username.my-project-data.population.csv` or something like that.
"""
function check_dataset_name(name::AbstractString)
if !occursin(DATASET_NAME_REGEX, name)
error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
error("DataSet name \"$name\" is invalid. DataSet names must start with a letter or a number, and can contain only letters, numbers, `-` and `_`, or `/` and `.` as segment separators.")
end
end
# DataSet names disallow most punctuation for now, as it may be needed as
# delimiters in data-related syntax (eg, for the data REPL).
const DATASET_NAME_REGEX_STRING = raw"""
[[:alnum:]]
(?:
[-[:alnum:]_] |
/ (?=[[:alnum:]])
[-[:alnum:]_] |
\.(?=[[:alnum:]]) |
\/ (?=[[:alnum:]])
)*
"""
const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")
Expand Down
58 changes: 46 additions & 12 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,21 +98,55 @@ end
end

#-------------------------------------------------------------------------------
function load_list(filename)
lines = eachline(joinpath(@__DIR__, filename))
filter(!isempty, strip.(lines))
end
@testset "Data set name parsing" begin
@testset "Valid name: $name" for name in (
"a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
"1", "a/1", "123", "12ab/34cd", "1/2/3", "1-2-3", "x_-__", "a---",
)
@test DataSets.check_dataset_name(name) === nothing
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
@testset "Valid names" begin
valid_names = load_list("testnames-valid.txt")
@test !isempty(valid_names)
@testset "Valid name: $name" for name in valid_names
@test DataSets.check_dataset_name(name) === nothing
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
# Also test that the name is still valid when it appears as part of
# a path elements.
let path_name = "foo/$(name)"
@test DataSets.check_dataset_name(path_name) === nothing
@test DataSets._split_dataspec(path_name) == (path_name, nothing, nothing)
end
let path_name = "$(name)/foo"
@test DataSets.check_dataset_name(path_name) === nothing
@test DataSets._split_dataspec(path_name) == (path_name, nothing, nothing)
end
let path_name = "foo/$(name)/bar"
@test DataSets.check_dataset_name(path_name) === nothing
@test DataSets._split_dataspec(path_name) == (path_name, nothing, nothing)
end
end
end

@testset "Invalid name: $name" for name in (
"a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/ _/b",
"a/-a", "a/-1",
)
@test_throws ErrorException DataSets.check_dataset_name(name)
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
@testset "Invalid names" begin
invalid_names = load_list("testnames-invalid.txt")
@test !isempty(invalid_names)
@testset "Invalid name: $name" for name in invalid_names
@test_throws ErrorException DataSets.check_dataset_name(name)
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
# Also test that the name is still invalid when it appears as part of
# a path elements.
let path_name = "foo/$(name)"
@test_throws ErrorException DataSets.check_dataset_name(path_name) === nothing
@test DataSets._split_dataspec(path_name) == (nothing, nothing, nothing)
end
let path_name = "$(name)/foo"
@test_throws ErrorException DataSets.check_dataset_name(path_name) === nothing
@test DataSets._split_dataspec(path_name) == (nothing, nothing, nothing)
end
let path_name = "foo/$(name)/bar"
@test_throws ErrorException DataSets.check_dataset_name(path_name) === nothing
@test DataSets._split_dataspec(path_name) == (nothing, nothing, nothing)
end
end
end
end

Expand Down
25 changes: 25 additions & 0 deletions test/testnames-invalid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
a b
a/b/
a//b
/a/b
a/-
a/ _/b
a/-a
a/-1
.a
..a
a.
a..
.a.
a..b
.abc
abc.
abc/.def
abc/def.
a./b
a.-
_._
a._b
a.-b
./a
b/../a
24 changes: 24 additions & 0 deletions test/testnames-valid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
a_b
a-b
a1
δεδομένα
a/b
a/b/c
a-
b_
1
a/1
123
12ab/34cd
1/2/3
1-2-3
x_-__
a---
a.b
a.b
abc.def
abc/def.ghi
abc-def.ghi_jkl
a.b.c
a_.c
foo__-.csv
Loading