Skip to content

Commit 5f576ff

Browse files
feat: add license short titles
1 parent de06cfe commit 5f576ff

File tree

5 files changed

+99
-13
lines changed

5 files changed

+99
-13
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""add license_title_short column
2+
3+
Revision ID: ebb26242c904
4+
Revises: 8182d8c386f7
5+
Create Date: 2025-02-25 15:47:03.169950
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
from alembic import op
12+
import sqlalchemy as sa
13+
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "ebb26242c904"
17+
down_revision: Union[str, None] = "8182d8c386f7"
18+
branch_labels: Union[str, Sequence[str], None] = None
19+
depends_on: Union[str, Sequence[str], None] = None
20+
21+
22+
def upgrade() -> None:
23+
# ### commands auto generated by Alembic - please adjust! ###
24+
op.add_column(
25+
"dataset", sa.Column("license_title_short", sa.String(), nullable=True)
26+
)
27+
op.alter_column(
28+
"dataset", "license_title", existing_type=sa.VARCHAR(), nullable=True
29+
)
30+
# ### end Alembic commands ###
31+
32+
33+
def downgrade() -> None:
34+
# ### commands auto generated by Alembic - please adjust! ###
35+
op.alter_column(
36+
"dataset", "license_title", existing_type=sa.VARCHAR(), nullable=False
37+
)
38+
op.drop_column("dataset", "license_title_short")
39+
# ### end Alembic commands ###

oc4ids_datastore_pipeline/database.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class Dataset(Base):
3030
publisher_name: Mapped[str] = mapped_column(String)
3131
license_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3232
license_title: Mapped[Optional[str]] = mapped_column(String, nullable=True)
33+
license_title_short: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3334
json_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3435
csv_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3536
xlsx_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,16 @@ def save_dataset_metadata(
108108
try:
109109
publisher_name = json_data.get("publisher", {}).get("name", "")
110110
license_url = json_data.get("license", None)
111-
license_title = get_license_title_from_url(license_url) if license_url else None
111+
license_title, license_title_short = (
112+
get_license_title_from_url(license_url) if license_url else (None, None)
113+
)
112114
dataset = Dataset(
113115
dataset_id=dataset_id,
114116
source_url=source_url,
115117
publisher_name=publisher_name,
116118
license_url=license_url,
117119
license_title=license_title,
120+
license_title_short=license_title_short,
118121
json_url=json_url,
119122
csv_url=csv_url,
120123
xlsx_url=xlsx_url,

oc4ids_datastore_pipeline/registry.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,18 @@ def fetch_registered_datasets() -> dict[str, str]:
3131
return registered_datasets
3232

3333

34-
def fetch_license_mappings() -> dict[str, str]:
34+
def fetch_license_mappings() -> dict[str, dict[str, Optional[str]]]:
3535
logger.info("Fetching license mappings from registry")
3636
try:
3737
url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/license/records_api.json" # noqa: E501
3838
r = requests.get(url)
3939
r.raise_for_status()
4040
json_data = r.json()
4141
return {
42-
urls["fields"]["url"]["value"]: license["fields"]["title"]["value"]
42+
urls["fields"]["url"]["value"]: {
43+
"title": license["fields"]["title"]["value"],
44+
"title_short": license["fields"]["title_short"]["value"],
45+
}
4346
for license in json_data["records"].values()
4447
for urls in license["fields"]["urls"]["values"]
4548
}
@@ -52,8 +55,9 @@ def fetch_license_mappings() -> dict[str, str]:
5255

5356
def get_license_title_from_url(
5457
url: str, force_refresh: Optional[bool] = False
55-
) -> Optional[str]:
58+
) -> tuple[Optional[str], Optional[str]]:
5659
global _license_mappings
5760
if force_refresh or (_license_mappings is None):
5861
_license_mappings = fetch_license_mappings()
59-
return _license_mappings.get(url, None)
62+
license_titles = _license_mappings.get(url, {})
63+
return license_titles.get("title"), license_titles.get("title_short")

tests/test_registry.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def test_fetch_license_mappings(mocker: MockerFixture) -> None:
5959
"license_1": {
6060
"fields": {
6161
"title": {"value": "License 1"},
62+
"title_short": {"value": "L1"},
6263
"urls": {
6364
"values": [
6465
{
@@ -80,6 +81,7 @@ def test_fetch_license_mappings(mocker: MockerFixture) -> None:
8081
"license_2": {
8182
"fields": {
8283
"title": {"value": "License 2"},
84+
"title_short": {"value": "L2"},
8385
"urls": {
8486
"values": [
8587
{
@@ -99,9 +101,18 @@ def test_fetch_license_mappings(mocker: MockerFixture) -> None:
99101
result = fetch_license_mappings()
100102

101103
assert result == {
102-
"https://license_1.com/license": "License 1",
103-
"https://license_1.com/different_url": "License 1",
104-
"https://license_2.com/license": "License 2",
104+
"https://license_1.com/license": {
105+
"title": "License 1",
106+
"title_short": "L1",
107+
},
108+
"https://license_1.com/different_url": {
109+
"title": "License 1",
110+
"title_short": "L1",
111+
},
112+
"https://license_2.com/license": {
113+
"title": "License 2",
114+
"title_short": "L2",
115+
},
105116
}
106117

107118

@@ -121,27 +132,55 @@ def test_get_license_title_from_url(mocker: MockerFixture) -> None:
121132
"oc4ids_datastore_pipeline.registry.fetch_license_mappings"
122133
)
123134
patch_license_mappings.return_value = {
124-
"https://license_1.com/license": "License 1",
125-
"https://license_2.com/license": "License 2",
135+
"https://license_1.com/license": {
136+
"title": "License 1",
137+
"title_short": "L1",
138+
},
139+
"https://license_2.com/license": {
140+
"title": "License 2",
141+
"title_short": "L2",
142+
},
126143
}
127144

128145
license_title = get_license_title_from_url(
129146
"https://license_2.com/license", force_refresh=True
130147
)
131148

132-
assert license_title == "License 2"
149+
assert license_title == ("License 2", "L2")
133150

134151

135152
def test_get_license_title_from_url_not_in_mapping(mocker: MockerFixture) -> None:
136153
patch_license_mappings = mocker.patch(
137154
"oc4ids_datastore_pipeline.registry.fetch_license_mappings"
138155
)
139156
patch_license_mappings.return_value = {
140-
"https://license_1.com/license": "License 1",
157+
"https://license_1.com/license": {
158+
"title": "License 1",
159+
"title_short": "L1",
160+
},
141161
}
142162

143163
license_title = get_license_title_from_url(
144164
"https://license_2.com/license", force_refresh=True
145165
)
146166

147-
assert license_title is None
167+
assert license_title == (None, None)
168+
169+
170+
def test_get_license_name_from_url_short_name_not_in_mapping(
171+
mocker: MockerFixture,
172+
) -> None:
173+
patch_license_mappings = mocker.patch(
174+
"oc4ids_datastore_pipeline.registry.fetch_license_mappings"
175+
)
176+
patch_license_mappings.return_value = {
177+
"https://license_2.com/license": {
178+
"title": "License 2",
179+
},
180+
}
181+
182+
license_title = get_license_title_from_url(
183+
"https://license_2.com/license", force_refresh=True
184+
)
185+
186+
assert license_title == ("License 2", None)

0 commit comments

Comments
 (0)