Skip to content

Commit 2b31808

Browse files
authored
Handle hyphens in dataset() (#57)
1 parent bd8444e commit 2b31808

File tree

3 files changed

+53
-38
lines changed

3 files changed

+53
-38
lines changed

src/DataSet.jl

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,17 @@ separated with forward slashes. Examples:
5151
username/data
5252
organization/project/data
5353
"""
54-
function is_valid_dataset_name(name::AbstractString)
55-
# DataSet names disallow most punctuation for now, as it may be needed as
56-
# delimiters in data-related syntax (eg, for the data REPL).
57-
dataset_name_pattern = r"
58-
^
59-
[[:alpha:]]
60-
(?:
61-
[-[:alnum:]_] |
62-
/ (?=[[:alpha:]])
63-
)*
64-
$
65-
"x
66-
return occursin(dataset_name_pattern, name)
67-
end
54+
is_valid_dataset_name(name::AbstractString) = occursin(DATASET_NAME_REGEX, name)
55+
# DataSet names disallow most punctuation for now, as it may be needed as
56+
# delimiters in data-related syntax (eg, for the data REPL).
57+
const DATASET_NAME_REGEX_STRING = raw"""
58+
[[:alpha:]]
59+
(?:
60+
[-[:alnum:]_] |
61+
/ (?=[[:alpha:]])
62+
)*
63+
"""
64+
const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")
6865

6966
function make_valid_dataset_name(name)
7067
if !is_valid_dataset_name(name)
@@ -191,4 +188,3 @@ function Base.open(as_type, dataset::DataSet)
191188
@! ResourceContexts.detach_context_cleanup(result)
192189
end
193190
end
194-

src/data_project.jl

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,20 @@ function _unescapeuri(str)
107107
return String(take!(out))
108108
end
109109

110+
# Parse as a suffix of URI syntax
111+
# name/of/dataset?param1=value1&param2=value2#fragment
112+
const DATASET_SPEC_REGEX = Regex(
113+
"""
114+
^
115+
($(DATASET_NAME_REGEX_STRING))
116+
(?:\\?([^#]*))? # query - a=b&c=d
117+
(?:\\#(.*))? # fragment - ...
118+
\$
119+
""",
120+
"x",
121+
)
110122
function _split_dataspec(spec::AbstractString)
111-
# Parse as a suffix of URI syntax
112-
# name/of/dataset?param1=value1&param2=value2#fragment
113-
m = match(r"
114-
^
115-
((?:[[:alpha:]][[:alnum:]_]*/?)+) # name - a/b/c
116-
(?:\?([^#]*))? # query - a=b&c=d
117-
(?:\#(.*))? # fragment - ...
118-
$"x,
119-
spec)
123+
m = match(DATASET_SPEC_REGEX, spec)
120124
if isnothing(m)
121125
return nothing, nothing, nothing
122126
end

test/runtests.jl

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -78,20 +78,20 @@ end
7878

7979
#-------------------------------------------------------------------------------
8080
@testset "Data set names" begin
81-
# Valid names
82-
@test DataSets.is_valid_dataset_name("a_b")
83-
@test DataSets.is_valid_dataset_name("a-b")
84-
@test DataSets.is_valid_dataset_name("a1")
85-
@test DataSets.is_valid_dataset_name("δεδομένα")
86-
@test DataSets.is_valid_dataset_name("a/b")
87-
@test DataSets.is_valid_dataset_name("a/b/c")
88-
# Invalid names
89-
@test !DataSets.is_valid_dataset_name("1")
90-
@test !DataSets.is_valid_dataset_name("a b")
91-
@test !DataSets.is_valid_dataset_name("a.b")
92-
@test !DataSets.is_valid_dataset_name("a/b/")
93-
@test !DataSets.is_valid_dataset_name("a//b")
94-
@test !DataSets.is_valid_dataset_name("/a/b")
81+
@testset "Valid name: $name" for name in (
82+
"a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
83+
)
84+
@test DataSets.is_valid_dataset_name(name)
85+
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
86+
end
87+
88+
@testset "Invalid name: $name" for name in (
89+
"1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b"
90+
)
91+
@test !DataSets.is_valid_dataset_name(name)
92+
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
93+
end
94+
9595
# Error message for invalid names
9696
@test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `_` or `/`.") DataSets.check_dataset_name("a?b")
9797

@@ -107,6 +107,21 @@ end
107107
end
108108

109109
@testset "URL-like dataspec parsing" begin
110+
# Valid dataspecs
111+
DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f")
112+
DataSets._split_dataspec("foo#f") == ("foo", nothing, "f")
113+
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
114+
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
115+
# Invalid dataspecs
116+
DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing)
117+
DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing)
118+
DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing)
119+
DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing)
120+
DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing)
121+
DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing)
122+
DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing)
123+
DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing)
124+
110125
proj = DataSets.load_project("Data.toml")
111126

112127
@test !haskey(dataset(proj, "a_text_file"), "dataspec")

0 commit comments

Comments
 (0)