Skip to content

Commit d9f1e8c

Browse files
committed
tst: add tests for metadata conversion
1 parent 337c0a1 commit d9f1e8c

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import copy
2+
from typing import Dict, Any
3+
4+
import pytest
5+
6+
from dandischema.utils import google_dataset_metadata
7+
8+
9+
@pytest.fixture
10+
def sample_dandiset_metadata() -> Dict[str, Any]:
11+
"""Sample DANDI metadata for testing"""
12+
return {
13+
"@context": "https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.4/context.json",
14+
"schemaKey": "Dandiset",
15+
"identifier": "DANDI:000707",
16+
"name": "Test Dandiset",
17+
"description": "A test dandiset for testing Google Dataset Search compatibility",
18+
"contributor": [
19+
{
20+
"schemaKey": "Person",
21+
"name": "Doe, John",
22+
"roleName": ["dcite:Author", "dcite:ContactPerson"],
23+
"identifier": "0000-0001-2345-6789",
24+
"email": "john.doe@example.com",
25+
"includeInCitation": True
26+
},
27+
{
28+
"schemaKey": "Organization",
29+
"name": "Test Organization",
30+
"roleName": ["dcite:Sponsor"],
31+
"identifier": "https://ror.org/xxxxxxxxx",
32+
"includeInCitation": False
33+
}
34+
],
35+
"license": ["spdx:CC-BY-4.0"],
36+
"schemaVersion": "0.6.4",
37+
"assetsSummary": {
38+
"schemaKey": "AssetsSummary",
39+
"numberOfBytes": 1000000,
40+
"numberOfFiles": 10,
41+
"dataStandard": [
42+
{
43+
"name": "Neurodata Without Borders (NWB)",
44+
"identifier": "RRID:SCR_015242"
45+
}
46+
],
47+
"species": [
48+
{
49+
"name": "Homo sapiens",
50+
"identifier": "http://purl.obolibrary.org/obo/NCBITaxon_9606"
51+
}
52+
],
53+
"approach": [
54+
{
55+
"name": "electrophysiology",
56+
"identifier": "http://uri.interlex.org/base/ilx_0739363"
57+
}
58+
],
59+
"measurementTechnique": [
60+
{
61+
"name": "multi-electrode extracellular electrophysiology",
62+
"identifier": "http://uri.interlex.org/base/ilx_0739400"
63+
}
64+
]
65+
}
66+
}
67+
68+
69+
def test_google_dataset_metadata_basic_transformation(sample_dandiset_metadata):
70+
"""Test that the basic transformation works correctly"""
71+
result = google_dataset_metadata(sample_dandiset_metadata)
72+
73+
# Check that the original metadata is not modified
74+
assert sample_dandiset_metadata != result
75+
76+
# Check that schema:Dataset is added to schemaKey
77+
assert "schema:Dataset" in result["schemaKey"]
78+
79+
# Check that creator is properly formatted
80+
assert "creator" in result
81+
assert isinstance(result["creator"], list)
82+
assert len(result["creator"]) > 0
83+
84+
# Check first creator
85+
creator = result["creator"][0]
86+
assert creator["schemaKey"] == "schema:Person"
87+
assert "name" in creator
88+
89+
# Check that license is properly formatted
90+
assert "license" in result
91+
assert isinstance(result["license"], list)
92+
assert "https://spdx.org/licenses/CC-BY-4.0" in result["license"]
93+
94+
# Check that version is present
95+
assert "version" in result
96+
97+
# Check that identifier is properly formatted
98+
assert "identifier" in result
99+
assert result["identifier"] == "https://identifiers.org/DANDI:000707"
100+
101+
# Check that keywords exist
102+
assert "keywords" in result
103+
assert isinstance(result["keywords"], list)
104+
assert len(result["keywords"]) > 0
105+
assert "neuroscience" in result["keywords"]
106+
assert "DANDI" in result["keywords"]
107+
108+
109+
def test_google_dataset_metadata_preserves_original(sample_dandiset_metadata):
110+
"""Test that the original metadata is not modified"""
111+
original = copy.deepcopy(sample_dandiset_metadata)
112+
google_dataset_metadata(sample_dandiset_metadata)
113+
114+
# Verify the original is unchanged
115+
assert original == sample_dandiset_metadata
116+
117+
118+
def test_google_dataset_metadata_with_existing_creator(sample_dandiset_metadata):
119+
"""Test that existing creator is preserved"""
120+
# Add a creator field
121+
sample_dandiset_metadata["creator"] = [
122+
{
123+
"schemaKey": "Person",
124+
"name": "Jane Smith",
125+
"identifier": "https://orcid.org/0000-0002-3456-7890"
126+
}
127+
]
128+
129+
result = google_dataset_metadata(sample_dandiset_metadata)
130+
131+
# Check that the existing creator is preserved
132+
assert result["creator"] == sample_dandiset_metadata["creator"]
133+
134+
135+
def test_google_dataset_metadata_with_existing_keywords(sample_dandiset_metadata):
136+
"""Test that existing keywords are preserved and extended"""
137+
# Add keywords field
138+
sample_dandiset_metadata["keywords"] = ["test", "example"]
139+
140+
result = google_dataset_metadata(sample_dandiset_metadata)
141+
142+
# Check that the existing keywords are preserved
143+
assert "test" in result["keywords"]
144+
assert "example" in result["keywords"]
145+
146+
# Check that additional keywords are added
147+
assert "neuroscience" in result["keywords"]
148+
assert "DANDI" in result["keywords"]
149+
150+
151+
def test_google_dataset_metadata_with_no_license(sample_dandiset_metadata):
152+
"""Test handling when no license is present"""
153+
# Remove license field
154+
no_license_metadata = copy.deepcopy(sample_dandiset_metadata)
155+
del no_license_metadata["license"]
156+
157+
result = google_dataset_metadata(no_license_metadata)
158+
159+
# Check that license is not in the result
160+
assert "license" not in result
161+
162+
163+
def test_google_dataset_metadata_with_no_contributors(sample_dandiset_metadata):
164+
"""Test handling when no contributors are present"""
165+
# Remove contributor field
166+
no_contributor_metadata = copy.deepcopy(sample_dandiset_metadata)
167+
del no_contributor_metadata["contributor"]
168+
169+
result = google_dataset_metadata(no_contributor_metadata)
170+
171+
# Check that creator is not in the result
172+
assert "creator" not in result
173+
174+
175+
def test_google_dataset_metadata_with_date_published(sample_dandiset_metadata):
176+
"""Test handling of datePublished field"""
177+
# Add datePublished field
178+
sample_dandiset_metadata["datePublished"] = "2023-01-01T00:00:00Z"
179+
180+
result = google_dataset_metadata(sample_dandiset_metadata)
181+
182+
# Check that datePublished is preserved
183+
assert result["datePublished"] == "2023-01-01T00:00:00Z"
184+
185+
186+
def test_google_dataset_metadata_with_date_created_fallback(sample_dandiset_metadata):
187+
"""Test fallback to dateCreated when datePublished is not present"""
188+
# Add dateCreated field
189+
sample_dandiset_metadata["dateCreated"] = "2022-01-01T00:00:00Z"
190+
191+
result = google_dataset_metadata(sample_dandiset_metadata)
192+
193+
# Check that datePublished is set to dateCreated
194+
assert result["datePublished"] == "2022-01-01T00:00:00Z"

0 commit comments

Comments
 (0)