Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions alembic/versions/79280fd201ed_add_metadata_urls_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""add metadata_urls field.

Revision ID: 79280fd201ed
Revises: ddf161fdce37
Create Date: 2025-12-24 10:38:39.791341

"""

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as dialect_postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision = "79280fd201ed"
down_revision = "ddf161fdce37"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column(
"resources",
sa.Column("metadata_urls", dialect_postgresql.ARRAY(sa.String)),
)


def downgrade() -> None:
op.drop_column("resources", "metadata_urls")
1 change: 1 addition & 0 deletions cads_catalogue/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ class Resource(BaseModel):
update_frequency = sa.Column(sa.String)
variables: Any = sa.Column(dialect_postgresql.JSONB)
content_size = sa.Column(sa.Float)
metadata_urls = sa.Column(dialect_postgresql.ARRAY(sa.String))

# FAIR
fair_timestamp = sa.Column(sa.DateTime(timezone=True), default=None, nullable=True)
Expand Down
1 change: 1 addition & 0 deletions cads_catalogue/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An
metadata["use_limitation"] = data.get("use_limitation")
metadata["content_size"] = data.get("content_size")
metadata["update_frequency"] = data.get("update_frequency")
metadata["metadata_urls"] = data.get("isBasedOn", []) or []
return metadata


Expand Down
8 changes: 8 additions & 0 deletions cads_catalogue/validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ def validate_metadata_json(dataset_folder):
"use_limitation",
"content_size",
"update_frequency",
"is_based_on",
]
# suggest to insert a value for missing/None fields:
for optional_field in optional_fields:
Expand Down Expand Up @@ -376,6 +377,13 @@ def validate_metadata_json(dataset_folder):
if not utils.is_url(keyword_url):
logger.error(f"keyword_url {keyword_url} is not an url")

# validate is_based_on as list of urls
is_based_on = data.get("is_based_on")
if is_based_on:
for item in is_based_on:
if not utils.is_url(item):
logger.error(f"is_based_on entry {item} is not an url")

# validate field that is a list of strings
for field in ["licences", "qos_tags", "related_resources_keywords"]:
field_value = data.get(field)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
],
"popularity": 500,
"qos_tags": ["tag1", "tag2", "tag3"],
"isBasedOn": ["http://cfconventions.org/documents.html"],
"api_enforce_constraints": true,
"title": "ERA5-Land hourly data from 1950 to present",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"keywords_urls": [],
"popularity": 500,
"qos_tags": ["tag1", "tag2", "tag3"],
"isBasedOn": ["http://cfconventions.org/documents.html"],
"api_enforce_constraints": true,
"title": "ERA5-Land hourly data from 1950 to present",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past."
Expand Down
16 changes: 10 additions & 6 deletions tests/data/dumped_resources1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"begin_date": "1950-01-01",
"end_date": "2023-02-11",
"publication_date": "2019-07-12",
"record_update": "2025-08-27 07:45:50.510200+03:00",
"record_update": "2025-12-24 10:11:04.217153+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"citation": null,
Expand Down Expand Up @@ -58,12 +58,15 @@
"update_frequency": null,
"variables": [],
"content_size": null,
"metadata_urls": [
"http://cfconventions.org/documents.html"
],
"fair_timestamp": null,
"fulltext": null,
"high_priority_terms": "reanalysis ERA5 land",
"popularity": 500,
"search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B",
"fts": "'era5':2 'land':3 'reanalysi':1",
"fair_timestamp": null
"fts": "'era5':2 'land':3 'reanalysi':1"
},
{
"resource_id": 2,
Expand All @@ -89,7 +92,7 @@
"begin_date": "1950-01-01",
"end_date": "2022-12-01",
"publication_date": "2019-06-23",
"record_update": "2025-08-27 07:45:50.531565+03:00",
"record_update": "2025-12-24 10:11:04.257466+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"citation": null,
Expand Down Expand Up @@ -122,11 +125,12 @@
"update_frequency": null,
"variables": [],
"content_size": null,
"metadata_urls": [],
"fair_timestamp": null,
"fulltext": "climate reanalysis past land era5 hydrology physics biosphere copernicus c3s conditions variables monthly means",
"high_priority_terms": "",
"popularity": 1,
"search_field": "'1950':14A 'accur':95B 'across':68B 'averag':11A 'back':90B 'biospher':110C 'c3s':112C 'climat':59B,99B,103C 'combin':62B 'compar':41B 'complet':74B 'compon':54B 'condit':113C 'consist':26B,76B 'copernicus':111C 'data':12A,64B,85B 'dataset':23B,77B 'decad':36B,89B 'descript':96B 'ecmwf':57B 'enhanc':39B 'era5':3A,8A,18B,43B,45B,58B,107C 'era5-land':7A,17B,44B 'evolut':30B 'global':73B 'goe':87B 'hydrolog':108C 'land':4A,9A,19B,32B,46B,53B,106C 'law':80B 'mean':6A,116C 'model':63B 'month':5A,10A,115C 'observ':66B 'past':102B,105C 'physic':82B,109C 'present':16A 'produc':49B,84B 'provid':24B,93B 'reanalysi':2A,22B,60B,61B,83B,104C 'reanalysis-era5-land-monthly-means':1A 'replay':51B 'resolut':40B 'sever':35B,88B 'time':92B 'use':78B 'variabl':33B,114C 'view':27B 'world':70B",
"fts": "",
"fair_timestamp": null
"fts": ""
}
]
16 changes: 10 additions & 6 deletions tests/data/dumped_resources2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"begin_date": "1950-01-01",
"end_date": "2023-02-11",
"publication_date": "2019-07-12",
"record_update": "2025-08-27 07:45:50.510200+03:00",
"record_update": "2025-12-24 10:11:04.217153+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"citation": null,
Expand Down Expand Up @@ -58,12 +58,15 @@
"update_frequency": null,
"variables": [],
"content_size": null,
"metadata_urls": [
"http://cfconventions.org/documents.html"
],
"fair_timestamp": null,
"fulltext": null,
"high_priority_terms": "reanalysis ERA5 land",
"popularity": 500,
"search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B",
"fts": "'era5':2 'land':3 'reanalysi':1",
"fair_timestamp": null
"fts": "'era5':2 'land':3 'reanalysi':1"
},
{
"resource_id": 2,
Expand All @@ -89,7 +92,7 @@
"begin_date": "1950-01-01",
"end_date": "2022-12-01",
"publication_date": "2019-06-23",
"record_update": "2025-08-27 07:45:50.531565+03:00",
"record_update": "2025-12-24 10:11:04.257466+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"citation": null,
Expand Down Expand Up @@ -122,11 +125,12 @@
"update_frequency": null,
"variables": [],
"content_size": null,
"metadata_urls": [],
"fair_timestamp": null,
"fulltext": "climate reanalysis past land era5 hydrology physics biosphere copernicus c3s conditions variables monthly means",
"high_priority_terms": "",
"popularity": 1,
"search_field": "'1950':14A 'accur':95B 'across':68B 'averag':11A 'back':90B 'biospher':110C 'c3s':112C 'climat':59B,99B,103C 'combin':62B 'compar':41B 'complet':74B 'compon':54B 'condit':113C 'consist':26B,76B 'copernicus':111C 'data':12A,64B,85B 'dataset':23B,77B 'decad':36B,89B 'descript':96B 'ecmwf':57B 'enhanc':39B 'era5':3A,8A,18B,43B,45B,58B,107C 'era5-land':7A,17B,44B 'evolut':30B 'global':73B 'goe':87B 'hydrolog':108C 'land':4A,9A,19B,32B,46B,53B,106C 'law':80B 'mean':6A,116C 'model':63B 'month':5A,10A,115C 'observ':66B 'past':102B,105C 'physic':82B,109C 'present':16A 'produc':49B,84B 'provid':24B,93B 'reanalysi':2A,22B,60B,61B,83B,104C 'reanalysis-era5-land-monthly-means':1A 'replay':51B 'resolut':40B 'sever':35B,88B 'time':92B 'use':78B 'variabl':33B,114C 'view':27B 'world':70B",
"fts": "",
"fair_timestamp": null
"fts": ""
}
]
11 changes: 7 additions & 4 deletions tests/data/dumped_resources3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"adaptor_properties_hash": "3cfceb3cb45d4fb56d6dc29575c36af4",
"api_enforce_constraints": true,
"disabled_reason": null,
"sources_hash": "3cbb4e51e9993fac651033ea7334f7a5",
"sources_hash": "29a88c88049cd4dbf385dd1a2584140f",
"related_resources_keywords": [],
"sanity_check": null,
"sanity_check_conf": null,
Expand All @@ -23,7 +23,7 @@
"begin_date": "1950-01-01",
"end_date": "2023-02-11",
"publication_date": "2019-07-12",
"record_update": "2025-08-27 07:45:51.253762+03:00",
"record_update": "2025-12-24 10:11:05.982515+01:00",
"resource_update": "2023-02-17",
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
"citation": null,
Expand Down Expand Up @@ -58,11 +58,14 @@
"update_frequency": null,
"variables": [],
"content_size": null,
"metadata_urls": [
"http://cfconventions.org/documents.html"
],
"fair_timestamp": null,
"fulltext": null,
"high_priority_terms": "reanalysis ERA5 land",
"popularity": 500,
"search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B",
"fts": "'era5':2 'land':3 'reanalysi':1",
"fair_timestamp": null
"fts": "'era5':2 'land':3 'reanalysi':1"
}
]
Loading
Loading