Skip to content

Commit b78da9e

Browse files
authored
Merge pull request #317 from dandi/relax-on-default
Relax various patterns defined in `dandischema.models` when DANDI Schema is not set to a vendor
2 parents 4f04034 + 72b5257 commit b78da9e

File tree

3 files changed

+49
-31
lines changed

3 files changed

+49
-31
lines changed

dandischema/conf.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,16 @@
1414
_MODELS_MODULE_NAME = "dandischema.models"
1515
"""The full import name of the module containing the DANDI Pydantic models"""
1616

17-
_UNVENDORED_ID_PATTERN = r"[A-Z][-A-Z]*"
18-
_UNVENDORED_DOI_PREFIX_PATTERN = r"10\.\d{4,}"
17+
UNVENDORED_ID_PATTERN = r"[A-Z][-A-Z]*"
18+
UNVENDORED_DOI_PREFIX_PATTERN = r"10\.\d{4,}"
1919

2020
logger = logging.getLogger(__name__)
2121

22+
DEFAULT_INSTANCE_NAME = "DANDI-ADHOC"
23+
"""
24+
The default name of the DANDI instance
25+
"""
26+
2227

2328
class SpdxLicenseListInfo(BaseModel):
2429
"""
@@ -74,14 +79,12 @@ class Config(BaseSettings):
7479
model_config = SettingsConfigDict(env_prefix="dandi_")
7580

7681
instance_name: Annotated[
77-
str, StringConstraints(pattern=rf"^{_UNVENDORED_ID_PATTERN}$")
78-
] = "DANDI-ADHOC"
82+
str, StringConstraints(pattern=rf"^{UNVENDORED_ID_PATTERN}$")
83+
] = DEFAULT_INSTANCE_NAME
7984
"""Name of the DANDI instance"""
8085

8186
doi_prefix: Optional[
82-
Annotated[
83-
str, StringConstraints(pattern=rf"^{_UNVENDORED_DOI_PREFIX_PATTERN}$")
84-
]
87+
Annotated[str, StringConstraints(pattern=rf"^{UNVENDORED_DOI_PREFIX_PATTERN}$")]
8588
] = None
8689
"""
8790
The DOI prefix at DataCite

dandischema/models.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@
3737
from pydantic_core import CoreSchema
3838
from zarr_checksum.checksum import InvalidZarrChecksum, ZarrDirectoryDigest
3939

40-
from dandischema.conf import get_instance_config
40+
from dandischema.conf import (
41+
DEFAULT_INSTANCE_NAME,
42+
UNVENDORED_DOI_PREFIX_PATTERN,
43+
UNVENDORED_ID_PATTERN,
44+
get_instance_config,
45+
)
4146

4247
from .consts import DANDI_SCHEMA_VERSION
4348
from .digests.dandietag import DandiETag
@@ -48,13 +53,17 @@
4853
_INSTANCE_CONFIG = get_instance_config()
4954

5055
# Regex pattern for the prefix of identifiers
51-
ID_PATTERN = _INSTANCE_CONFIG.instance_name
56+
ID_PATTERN = (
57+
_INSTANCE_CONFIG.instance_name
58+
if _INSTANCE_CONFIG.instance_name != DEFAULT_INSTANCE_NAME
59+
else UNVENDORED_ID_PATTERN
60+
)
5261

5362
# The pattern that a DOI prefix of a dandiset must conform to
5463
DOI_PREFIX_PATTERN = (
5564
re.escape(_INSTANCE_CONFIG.doi_prefix)
5665
if _INSTANCE_CONFIG.doi_prefix is not None
57-
else None
66+
else UNVENDORED_DOI_PREFIX_PATTERN
5867
)
5968

6069
# Use DJANGO_DANDI_WEB_APP_URL to point to a specific deployment.
@@ -74,10 +83,13 @@
7483
)
7584
ASSET_UUID_PATTERN = r"^dandiasset:" + UUID_PATTERN
7685
VERSION_PATTERN = r"\d{6}/\d+\.\d+\.\d+"
86+
_INNER_DANDI_DOI_PATTERN = (
87+
rf"{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.{VERSION_PATTERN}"
88+
)
7789
DANDI_DOI_PATTERN = (
78-
rf"^{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.{VERSION_PATTERN}$"
79-
if DOI_PREFIX_PATTERN is not None
80-
else None
90+
rf"^{_INNER_DANDI_DOI_PATTERN}$"
91+
if _INSTANCE_CONFIG.doi_prefix is not None
92+
else rf"^({_INNER_DANDI_DOI_PATTERN}|)$" # This matches an empty string as well
8193
)
8294
DANDI_PUBID_PATTERN = rf"^{ID_PATTERN}:{VERSION_PATTERN}$"
8395
DANDI_NSKEY = "dandi" # Namespace for DANDI ontology
@@ -1879,14 +1891,11 @@ class Publishable(DandiBaseModel):
18791891

18801892
_doi_field_kwargs: dict[str, Any] = {
18811893
"title": "DOI",
1894+
"pattern": DANDI_DOI_PATTERN,
18821895
"json_schema_extra": {"readOnly": True, "nskey": DANDI_NSKEY},
18831896
}
1884-
if DANDI_DOI_PATTERN is not None:
1885-
_doi_field_kwargs["pattern"] = DANDI_DOI_PATTERN
1886-
else:
1897+
if _INSTANCE_CONFIG.doi_prefix is None:
18871898
_doi_field_kwargs["default"] = ""
1888-
# restricting the value to empty string to indicate that there is no DOI
1889-
_doi_field_kwargs["pattern"] = r"^$"
18901899

18911900

18921901
class PublishedDandiset(Dandiset, Publishable):

dandischema/tests/test_models.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -793,17 +793,19 @@ def _get_field_pattern(
793793
# Without any environment variables set. dandischema is unvendorized.
794794
(
795795
{},
796-
"DANDI-ADHOC",
797-
None,
796+
r"[A-Z][-A-Z]*",
797+
r"10\.\d{4,}",
798798
{
799799
"dandiset_id": "DANDI-ADHOC:001350/draft",
800800
"dandiset_identifier": "DANDI-ADHOC:001350",
801801
"published_dandiset_id": "DANDI-ADHOC:001350/0.250511.1527",
802+
"published_dandiset_doi": "",
802803
},
803804
{
804805
"dandiset_id": "45:001350/draft", # Invalid id prefix
805806
"dandiset_identifier": "DANDI-ADHOC:001350",
806807
"published_dandiset_id": "DANDI-ADHOC:001350/0.250511.1527",
808+
"published_dandiset_doi": "",
807809
},
808810
),
809811
(
@@ -832,34 +834,39 @@ def _get_field_pattern(
832834
"instance_name": "DANDI",
833835
},
834836
"DANDI",
835-
None,
837+
r"10\.\d{4,}",
836838
{
837839
"dandiset_id": "DANDI:001425/draft",
838840
"dandiset_identifier": "DANDI:001425",
839841
"published_dandiset_id": "DANDI:001425/0.250514.0602",
842+
"published_dandiset_doi": "10.48324/dandi.001425/0.250514.0602",
840843
},
841844
{
842845
"dandiset_id": "DANDI:001425/draft",
843846
"dandiset_identifier": "DANDI:001425",
844847
# Not matching the `ID_PATTERN` regex
845848
"published_dandiset_id": "DANDI3:001425/0.250514.0602",
849+
"published_dandiset_doi": "10.48324/dandi.001425/0.250514.0602",
846850
},
847851
),
848852
# === EMBER DANDI instance test cases ===
849853
# Without any environment variables set. dandischema is unvendorized.
850854
(
851855
{},
852-
"DANDI-ADHOC",
853-
None,
856+
r"[A-Z][-A-Z]*",
857+
r"10\.\d{4,}",
854858
{
855859
"dandiset_id": "DANDI-ADHOC:000005/draft",
856-
"dandiset_identifier": "DANDI-ADHOC:000005",
860+
"dandiset_identifier": "ABC:000005",
857861
"published_dandiset_id": "DANDI-ADHOC:000005/0.250404.1839",
862+
"published_dandiset_doi": "10.60533/ember-dandi.000005/0.250404.1839",
858863
},
859864
{
860865
"dandiset_id": "DANDI-ADHOC:000005/draft",
861-
"dandiset_identifier": "-DANDI-ADHOC:000005", # Invalid id prefix
866+
"dandiset_identifier": "ABC:000005",
862867
"published_dandiset_id": "DANDI-ADHOC:000005/0.250404.1839",
868+
# Invalid registrant code in the DOI prefix
869+
"published_dandiset_doi": "10.605/ember-dandi.000005/0.250404.1839",
863870
},
864871
),
865872
(
@@ -889,7 +896,7 @@ def _get_field_pattern(
889896
def test_vendorization(
890897
clear_dandischema_modules_and_set_env_vars: None,
891898
exp_id_pattern: str,
892-
exp_doi_prefix_pattern: Optional[str],
899+
exp_doi_prefix_pattern: str,
893900
# Fields that are valid for the vendorization
894901
valid_vendored_fields: dict[str, str],
895902
# Fields that are invalid for the vendorization
@@ -915,12 +922,11 @@ class VendoredFieldModel(BaseModel):
915922
published_dandiset_id: str = Field(
916923
pattern=_get_field_pattern("id", models_.PublishedDandiset)
917924
)
918-
if exp_doi_prefix_pattern is not None:
919-
published_dandiset_doi: str = Field(
920-
pattern=_get_field_pattern("doi", models_.PublishedDandiset)
921-
)
925+
published_dandiset_doi: str = Field(
926+
pattern=_get_field_pattern("doi", models_.PublishedDandiset)
927+
)
922928

923-
model_config = ConfigDict(strict=True, extra="forbid")
929+
model_config = ConfigDict(strict=True)
924930

925931
# Validate the valid vendored fields against the vendored patterns
926932
VendoredFieldModel.model_validate(valid_vendored_fields)

0 commit comments

Comments
 (0)