Skip to content
This repository was archived by the owner on Mar 10, 2026. It is now read-only.

Commit 83cea02

Browse files
committed
fix: Fix error in field validation
1 parent 1a3d306 commit 83cea02

File tree

6 files changed

+43
-35
lines changed

6 files changed

+43
-35
lines changed

src/mdverse_scrapers/models/dataset.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
model_validator,
1313
)
1414

15-
from .enums import DatasetRepoProjectName
15+
from .date import DATETIME_FORMAT
16+
from .enums import DatasetSourceName
1617
from .simulation import SimulationMetadata
1718

1819
DOI = Annotated[
@@ -31,7 +32,7 @@ class DatasetCoreMetadata(BaseModel):
3132
This model captures essential information about the source repository
3233
"""
3334

34-
dataset_repository_name: DatasetRepoProjectName = Field(
35+
dataset_repository_name: DatasetSourceName = Field(
3536
...,
3637
description=(
3738
"Name of the source data repository. "
@@ -62,11 +63,11 @@ class DatasetMetadata(SimulationMetadata, DatasetCoreMetadata):
6263
# ------------------------------------------------------------------
6364
# Project metadata
6465
# ------------------------------------------------------------------
65-
dataset_project_name: DatasetRepoProjectName | None = Field(
66+
dataset_project_name: DatasetSourceName | None = Field(
6667
None,
6768
description=(
6869
"Name of the source data project. "
69-
"Allowed values in the DatasetRepoProjectName enum. "
70+
"Allowed values in the DatasetSourceName enum. "
7071
"Examples: ZENODO, FIGSHARE, NOMAD..."
7172
),
7273
)
@@ -116,7 +117,7 @@ def date_last_fetched(self) -> str:
116117
str
117118
The current date and time in ISO 8601 format.
118119
"""
119-
return datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
120+
return datetime.now().strftime(DATETIME_FORMAT)
120121

121122
# ------------------------------------------------------------------
122123
# Descriptive metadata
@@ -165,7 +166,8 @@ def date_last_fetched(self) -> str:
165166
# Validators
166167
# ------------------------------------------------------------------
167168
@field_validator("date_created", "date_last_updated", mode="before")
168-
def format_dates(cls, value: datetime | str | None) -> str | None: # noqa: N805
169+
@classmethod
170+
def format_dates(cls, value: datetime | str | None) -> str | None:
169171
"""Convert datetime objects or strings to '%Y-%m-%dT%H:%M:%S' format.
170172
171173
Parameters
@@ -183,8 +185,8 @@ def format_dates(cls, value: datetime | str | None) -> str | None: # noqa: N805
183185
if value is None:
184186
return None
185187
if isinstance(value, datetime):
186-
return value.strftime("%Y-%m-%dT%H:%M:%S")
187-
return datetime.fromisoformat(value).strftime("%Y-%m-%dT%H:%M:%S")
188+
return value.strftime(DATETIME_FORMAT)
189+
return datetime.fromisoformat(value).strftime(DATETIME_FORMAT)
188190

189191
@field_validator(
190192
"description",
@@ -194,7 +196,8 @@ def format_dates(cls, value: datetime | str | None) -> str | None: # noqa: N805
194196
"author_names",
195197
mode="before",
196198
)
197-
def empty_to_none(cls, value: list | str | None) -> list | str | None: # noqa: N805
199+
@classmethod
200+
def empty_to_none(cls, value: list | str | None) -> list | str | None:
198201
"""
199202
Normalize empty fields to None.
200203

src/mdverse_scrapers/models/enums.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ class DataType(StrEnum):
1010
FILES = "files"
1111

1212

13-
class DatasetRepoProjectName(StrEnum):
14-
"""Molecular dynamaics data repositories and projects."""
13+
class DatasetSourceName(StrEnum):
14+
"""Molecular dynamics sources: data repositories and projects."""
1515

1616
ZENODO = "zenodo"
1717
FIGSHARE = "figshare"

src/mdverse_scrapers/models/file.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ class FileMetadata(DatasetCoreMetadata):
4343
# Validators
4444
# ------------------------------------------------------------------
4545
@field_validator("file_size_in_bytes", mode="before")
46-
def normalize_byte_string(cls, value: str | None) -> str | None: # noqa: N805
46+
@classmethod
47+
def normalize_byte_string(cls, value: str | None) -> str | None:
4748
"""
4849
Normalize the unit "Bytes" with "B" to make it acceptable for ByteSize.
4950

src/mdverse_scrapers/models/simulation.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ class SimulationMetadata(BaseModel):
8282
# Validators
8383
# ------------------------------------------------------------------
8484
@field_validator("simulation_timesteps_in_fs", "simulation_times", mode="before")
85+
@classmethod
8586
def validate_positive_simulation_values(
86-
self,
87+
cls,
8788
value: list[str | float] | None,
8889
) -> list[str | float] | None:
8990
"""Ensure simulation numeric parameters are strictly positive.
@@ -94,8 +95,8 @@ def validate_positive_simulation_values(
9495
9596
Parameters
9697
----------
97-
self: SimulationMetadata
98-
The Pydantic model instance being validated.
98+
cls: SimulationMetadata
99+
The Pydantic model class being validated.
99100
value : list[str | float] | None
100101
Raw input simulation parameter value.
101102
@@ -134,8 +135,9 @@ def check_positive(value: str | float | int):
134135
return value
135136

136137
@field_validator("simulation_temperatures_in_kelvin", mode="before")
138+
@classmethod
137139
def normalize_temperatures(
138-
self,
140+
cls,
139141
temperatures: list[str] | None,
140142
) -> list[float] | None:
141143
"""
@@ -147,8 +149,8 @@ def normalize_temperatures(
147149
148150
Parameters
149151
----------
150-
self: SimulationMetadata
151-
The Pydantic model instance being validated.
152+
cls: SimulationMetadata
153+
The Pydantic model class being validated.
152154
temperatures : list[str] | None
153155
Raw temperature values.
154156

tests/models/test_dataset.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import pytest
66

77
from mdverse_scrapers.models.dataset import DatasetMetadata
8-
from mdverse_scrapers.models.enums import DatasetRepoProjectName
8+
from mdverse_scrapers.models.date import DATETIME_FORMAT
9+
from mdverse_scrapers.models.enums import DatasetSourceName
910

1011

1112
# --------------------------------------
@@ -14,12 +15,12 @@
1415
def test_dataset_metadata_minimal_required_fields():
1516
"""Test creating DatasetMetadata with only required fields."""
1617
metadata = DatasetMetadata(
17-
dataset_repository_name=DatasetRepoProjectName.ZENODO,
18+
dataset_repository_name=DatasetSourceName.ZENODO,
1819
dataset_id_in_repository="123",
1920
dataset_url_in_repository="https://zenodo.org/record/123",
2021
title="Test Dataset",
2122
)
22-
assert metadata.dataset_repository_name == DatasetRepoProjectName.ZENODO
23+
assert metadata.dataset_repository_name == DatasetSourceName.ZENODO
2324
assert metadata.dataset_id_in_repository == "123"
2425
assert metadata.dataset_url_in_repository == "https://zenodo.org/record/123"
2526
assert metadata.title == "Test Dataset"
@@ -43,7 +44,7 @@ def test_dataset_metadata_minimal_required_fields():
4344
def test_empty_to_none(field, value, expected):
4445
"""Test that empty strings/lists are converted to None."""
4546
data = {
46-
"dataset_repository_name": DatasetRepoProjectName.ZENODO,
47+
"dataset_repository_name": DatasetSourceName.ZENODO,
4748
"dataset_id_in_repository": "123",
4849
"dataset_url_in_repository": "https://zenodo.org/record/123",
4950
"title": "Test Dataset",
@@ -60,14 +61,14 @@ def test_format_dates_with_datetime_objects():
6061
"""Test that datetime objects are correctly converted to ISO string format."""
6162
now = datetime(2026, 1, 23, 12, 0, 0)
6263
metadata = DatasetMetadata(
63-
dataset_repository_name=DatasetRepoProjectName.ZENODO,
64+
dataset_repository_name=DatasetSourceName.ZENODO,
6465
dataset_id_in_repository="123",
6566
dataset_url_in_repository="https://zenodo.org/record/123",
6667
title="Test Dataset",
6768
date_created=now,
6869
date_last_updated=now,
6970
)
70-
formatted = now.strftime("%Y-%m-%dT%H:%M:%S")
71+
formatted = now.strftime(DATETIME_FORMAT)
7172
assert metadata.date_created == formatted
7273
assert metadata.date_last_updated == formatted
7374

@@ -76,7 +77,7 @@ def test_format_dates_with_iso_strings():
7677
"""Test that ISO string dates are normalized to '%Y-%m-%dT%H:%M:%S' format."""
7778
expected_date = "2026-01-23T00:00:00"
7879
metadata = DatasetMetadata(
79-
dataset_repository_name=DatasetRepoProjectName.ZENODO,
80+
dataset_repository_name=DatasetSourceName.ZENODO,
8081
dataset_id_in_repository="123",
8182
dataset_url_in_repository="https://zenodo.org/record/123",
8283
title="Test Dataset",
@@ -91,15 +92,15 @@ def test_format_dates_with_iso_strings():
9192
def test_fill_project_fields():
9293
"""Test that project fields are populated from repository fields when missing."""
9394
metadata = DatasetMetadata(
94-
dataset_repository_name=DatasetRepoProjectName.ZENODO,
95+
dataset_repository_name=DatasetSourceName.ZENODO,
9596
dataset_id_in_repository="repo_123",
9697
dataset_url_in_repository="https://zenodo.org/record/repo_123",
9798
dataset_project_name=None,
9899
dataset_id_in_project=None,
99100
dataset_url_in_project=None,
100101
title="Test Dataset",
101102
)
102-
assert metadata.dataset_project_name == DatasetRepoProjectName.ZENODO
103+
assert metadata.dataset_project_name == DatasetSourceName.ZENODO
103104
assert metadata.dataset_id_in_project == "repo_123"
104105
assert metadata.dataset_url_in_project == "https://zenodo.org/record/repo_123"
105106

@@ -108,7 +109,7 @@ def test_fill_project_fields_from_repository_invalid_mapping():
108109
"""Test that ValueError is raised when repository cannot map to a project."""
109110
with pytest.raises(AttributeError, match="type object"):
110111
_ = DatasetMetadata(
111-
dataset_repository_name=DatasetRepoProjectName.REPO,
112+
dataset_repository_name=DatasetSourceName.DUMMY,
112113
dataset_id_in_repository="123",
113114
dataset_url_in_repository="https://example.com/123",
114115
title="Test Dataset",
@@ -119,7 +120,7 @@ def test_date_last_fetched_is_recent():
119120
"""Test that date_last_fetched is a recent datetime."""
120121
time_1 = datetime.now()
121122
metadata = DatasetMetadata(
122-
dataset_repository_name=DatasetRepoProjectName.ZENODO,
123+
dataset_repository_name=DatasetSourceName.ZENODO,
123124
dataset_id_in_repository="123",
124125
dataset_url_in_repository="https://zenodo.org/record/123",
125126
title="Test Dataset",
@@ -136,7 +137,7 @@ def test_date_last_fetched_is_recent():
136137
def test_dataset_metadata_full_scenario():
137138
"""Test a realistic scenario with mixed missing fields and validators."""
138139
metadata = DatasetMetadata(
139-
dataset_repository_name=DatasetRepoProjectName.FIGSHARE,
140+
dataset_repository_name=DatasetSourceName.FIGSHARE,
140141
dataset_id_in_repository="fig_456",
141142
dataset_url_in_repository="https://figshare.com/articles/fig_456",
142143
dataset_project_name=None,
@@ -156,7 +157,7 @@ def test_dataset_metadata_full_scenario():
156157
assert metadata.external_links is None
157158
assert metadata.license is None
158159
# Check project fields filled
159-
assert metadata.dataset_project_name == DatasetRepoProjectName.FIGSHARE
160+
assert metadata.dataset_project_name == DatasetSourceName.FIGSHARE
160161
assert metadata.dataset_id_in_project == "fig_456"
161162
assert metadata.dataset_url_in_project == "https://figshare.com/articles/fig_456"
162163
# Check date_last_fetched is filled

tests/models/test_file.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import pytest
44
from pydantic import ByteSize
55

6-
from mdverse_scrapers.models.enums import DatasetRepoProjectName
6+
from mdverse_scrapers.models.enums import DatasetSourceName
77
from mdverse_scrapers.models.file import FileMetadata
88

99

1010
def test_file_metadata_basic_creation():
1111
"""Test creating a FileMetadata instance with minimal required fields."""
1212
file = FileMetadata(
13-
dataset_repository_name=DatasetRepoProjectName.NOMAD,
13+
dataset_repository_name=DatasetSourceName.NOMAD,
1414
dataset_id_in_repository="abc123",
1515
dataset_url_in_repository="https://example.com",
1616
file_url_in_repository="https://example.com/file.xtc",
@@ -41,7 +41,7 @@ def test_file_size_normalization(
4141
) -> None:
4242
"""Test file size normalization."""
4343
file = FileMetadata(
44-
dataset_repository_name=DatasetRepoProjectName.ZENODO,
44+
dataset_repository_name=DatasetSourceName.ZENODO,
4545
dataset_id_in_repository="abc123",
4646
dataset_url_in_repository="https://example.com",
4747
file_url_in_repository="https://example.com/file.xtc",
@@ -67,12 +67,13 @@ def test_file_size_normalization(
6767
("complex.name.with.many.dots.pdb", "pdb"),
6868
("complex/path/to/file.pdb", "pdb"),
6969
("very.complex/path/to/file.pdb", "pdb"),
70+
("long path/with/some spaces/to/this_file.txt", "txt"),
7071
],
7172
)
7273
def test_file_type_computed_correctly(file_name: str, expected_file_type: str) -> None:
7374
"""Test that file_type is computed correctly from the file_name."""
7475
file = FileMetadata(
75-
dataset_repository_name=DatasetRepoProjectName.NOMAD,
76+
dataset_repository_name=DatasetSourceName.NOMAD,
7677
dataset_id_in_repository="abc123",
7778
dataset_url_in_repository="https://example.com",
7879
file_url_in_repository="https://example.com/file",

0 commit comments

Comments
 (0)