Skip to content

Commit 265d83b

Browse files
feat: save license metadata to database
1 parent 9f65a39 commit 265d83b

File tree

5 files changed

+54
-1
lines changed

5 files changed

+54
-1
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""add license columns to dataset table
2+
3+
Revision ID: 00238f37d113
4+
Revises: aaabf849b37f
5+
Create Date: 2025-02-04 17:49:14.122704
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
from alembic import op
12+
import sqlalchemy as sa
13+
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "00238f37d113"
17+
down_revision: Union[str, None] = "aaabf849b37f"
18+
branch_labels: Union[str, Sequence[str], None] = None
19+
depends_on: Union[str, Sequence[str], None] = None
20+
21+
22+
def upgrade() -> None:
23+
# ### commands auto generated by Alembic - please adjust! ###
24+
op.add_column("dataset", sa.Column("license_url", sa.String(), nullable=False))
25+
op.add_column("dataset", sa.Column("license_name", sa.String(), nullable=False))
26+
# ### end Alembic commands ###
27+
28+
29+
def downgrade() -> None:
30+
# ### commands auto generated by Alembic - please adjust! ###
31+
op.drop_column("dataset", "license_name")
32+
op.drop_column("dataset", "license_url")
33+
# ### end Alembic commands ###

oc4ids_datastore_pipeline/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class Dataset(Base):
2525
dataset_id: Mapped[str] = mapped_column(String, primary_key=True)
2626
source_url: Mapped[str] = mapped_column(String)
2727
publisher_name: Mapped[str] = mapped_column(String)
28+
license_url: Mapped[str] = mapped_column(String)
29+
license_name: Mapped[str] = mapped_column(String)
2830
json_url: Mapped[str] = mapped_column(String)
2931
updated_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
3032

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
from libcoveoc4ids.api import oc4ids_json_output
99

1010
from oc4ids_datastore_pipeline.database import Dataset, save_dataset
11-
from oc4ids_datastore_pipeline.registry import fetch_registered_datasets
11+
from oc4ids_datastore_pipeline.registry import (
12+
fetch_registered_datasets,
13+
get_license_name_from_url,
14+
)
1215

1316
logger = logging.getLogger(__name__)
1417

@@ -54,10 +57,14 @@ def save_dataset_metadata(
5457
) -> None:
5558
logger.info(f"Saving metadata for dataset {dataset_name}")
5659
publisher_name = json_data.get("publisher", {}).get("name", "")
60+
license_url = json_data.get("license", "")
61+
license_name = get_license_name_from_url(license_url)
5762
dataset = Dataset(
5863
dataset_id=dataset_name,
5964
source_url=source_url,
6065
publisher_name=publisher_name,
66+
license_url=license_url,
67+
license_name=license_name,
6168
json_url=json_url,
6269
updated_at=datetime.datetime.now(datetime.UTC),
6370
)

oc4ids_datastore_pipeline/registry.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from functools import lru_cache
12
import logging
23

34
import requests
@@ -23,6 +24,7 @@ def fetch_registered_datasets() -> dict[str, str]:
2324
raise Exception("Failed to fetch datasets list from registry", e)
2425

2526

27+
@lru_cache(maxsize=None)
2628
def fetch_license_mappings() -> dict[str, str]:
2729
logger.info("Fetching license mappings from registry")
2830
try:
@@ -40,3 +42,8 @@ def fetch_license_mappings() -> dict[str, str]:
4042
"Failed to fetch license mappings from registry, with error: " + str(e),
4143
)
4244
return {}
45+
46+
47+
def get_license_name_from_url(url: str) -> str:
48+
license_mappings = fetch_license_mappings()
49+
return license_mappings.get(url, "")

tests/test_registry.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,7 @@ def test_fetch_license_mappings_catches_exception(
9999
result = fetch_license_mappings()
100100

101101
assert result == {}
102+
103+
104+
def test_get_license_name_from_url() -> None:
105+
pass # TODO: Implement

0 commit comments

Comments
 (0)