Skip to content

Commit 3bb81e9

Browse files
alfredotosellikeul
andauthored
feat: new metadata_urls field (#171)
* feat: new metadata_urls field * isBasedOn to snake case --------- Co-authored-by: Luca Fabbri <l.fabbri@bopen.eu>
1 parent 780ac71 commit 3bb81e9

File tree

15 files changed

+207
-115
lines changed

15 files changed

+207
-115
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""add metadata_urls field.
2+
3+
Revision ID: 79280fd201ed
4+
Revises: ddf161fdce37
5+
Create Date: 2025-12-24 10:38:39.791341
6+
7+
"""
8+
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql as dialect_postgresql
11+
12+
from alembic import op
13+
14+
# revision identifiers, used by Alembic.
15+
revision = "79280fd201ed"
16+
down_revision = "ddf161fdce37"
17+
branch_labels = None
18+
depends_on = None
19+
20+
21+
def upgrade() -> None:
22+
op.add_column(
23+
"resources",
24+
sa.Column("metadata_urls", dialect_postgresql.ARRAY(sa.String)),
25+
)
26+
27+
28+
def downgrade() -> None:
29+
op.drop_column("resources", "metadata_urls")

cads_catalogue/database.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ class Resource(BaseModel):
332332
update_frequency = sa.Column(sa.String)
333333
variables: Any = sa.Column(dialect_postgresql.JSONB)
334334
content_size = sa.Column(sa.Float)
335+
metadata_urls = sa.Column(dialect_postgresql.ARRAY(sa.String))
335336

336337
# FAIR
337338
fair_timestamp = sa.Column(sa.DateTime(timezone=True), default=None, nullable=True)

cads_catalogue/manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An
350350
metadata["use_limitation"] = data.get("use_limitation")
351351
metadata["content_size"] = data.get("content_size")
352352
metadata["update_frequency"] = data.get("update_frequency")
353+
metadata["metadata_urls"] = data.get("isBasedOn", []) or []
353354
return metadata
354355

355356

cads_catalogue/validations.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ def validate_metadata_json(dataset_folder):
297297
"use_limitation",
298298
"content_size",
299299
"update_frequency",
300+
"is_based_on",
300301
]
301302
# suggest to insert a value for missing/None fields:
302303
for optional_field in optional_fields:
@@ -376,6 +377,13 @@ def validate_metadata_json(dataset_folder):
376377
if not utils.is_url(keyword_url):
377378
logger.error(f"keyword_url {keyword_url} is not an url")
378379

380+
# validate is_based_on as list of urls
381+
is_based_on = data.get("is_based_on")
382+
if is_based_on:
383+
for item in is_based_on:
384+
if not utils.is_url(item):
385+
logger.error(f"is_based_on entry {item} is not an url")
386+
379387
# validate field that is a list of strings
380388
for field in ["licences", "qos_tags", "related_resources_keywords"]:
381389
field_value = data.get(field)

tests/data/cads-forms-json/legacy/reanalysis-era5-land/metadata.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
],
5050
"popularity": 500,
5151
"qos_tags": ["tag1", "tag2", "tag3"],
52+
"isBasedOn": ["http://cfconventions.org/documents.html"],
5253
"api_enforce_constraints": true,
5354
"title": "ERA5-Land hourly data from 1950 to present",
5455
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past."

tests/data/cads-forms-json/reanalysis-era5-land/metadata.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"keywords_urls": [],
5151
"popularity": 500,
5252
"qos_tags": ["tag1", "tag2", "tag3"],
53+
"isBasedOn": ["http://cfconventions.org/documents.html"],
5354
"api_enforce_constraints": true,
5455
"title": "ERA5-Land hourly data from 1950 to present",
5556
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past."

tests/data/dumped_resources1.txt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
"begin_date": "1950-01-01",
2424
"end_date": "2023-02-11",
2525
"publication_date": "2019-07-12",
26-
"record_update": "2025-08-27 07:45:50.510200+03:00",
26+
"record_update": "2025-12-24 10:11:04.217153+01:00",
2727
"resource_update": "2023-02-17",
2828
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
2929
"citation": null,
@@ -58,12 +58,15 @@
5858
"update_frequency": null,
5959
"variables": [],
6060
"content_size": null,
61+
"metadata_urls": [
62+
"http://cfconventions.org/documents.html"
63+
],
64+
"fair_timestamp": null,
6165
"fulltext": null,
6266
"high_priority_terms": "reanalysis ERA5 land",
6367
"popularity": 500,
6468
"search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B",
65-
"fts": "'era5':2 'land':3 'reanalysi':1",
66-
"fair_timestamp": null
69+
"fts": "'era5':2 'land':3 'reanalysi':1"
6770
},
6871
{
6972
"resource_id": 2,
@@ -89,7 +92,7 @@
8992
"begin_date": "1950-01-01",
9093
"end_date": "2022-12-01",
9194
"publication_date": "2019-06-23",
92-
"record_update": "2025-08-27 07:45:50.531565+03:00",
95+
"record_update": "2025-12-24 10:11:04.257466+01:00",
9396
"resource_update": "2023-02-17",
9497
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
9598
"citation": null,
@@ -122,11 +125,12 @@
122125
"update_frequency": null,
123126
"variables": [],
124127
"content_size": null,
128+
"metadata_urls": [],
129+
"fair_timestamp": null,
125130
"fulltext": "climate reanalysis past land era5 hydrology physics biosphere copernicus c3s conditions variables monthly means",
126131
"high_priority_terms": "",
127132
"popularity": 1,
128133
"search_field": "'1950':14A 'accur':95B 'across':68B 'averag':11A 'back':90B 'biospher':110C 'c3s':112C 'climat':59B,99B,103C 'combin':62B 'compar':41B 'complet':74B 'compon':54B 'condit':113C 'consist':26B,76B 'copernicus':111C 'data':12A,64B,85B 'dataset':23B,77B 'decad':36B,89B 'descript':96B 'ecmwf':57B 'enhanc':39B 'era5':3A,8A,18B,43B,45B,58B,107C 'era5-land':7A,17B,44B 'evolut':30B 'global':73B 'goe':87B 'hydrolog':108C 'land':4A,9A,19B,32B,46B,53B,106C 'law':80B 'mean':6A,116C 'model':63B 'month':5A,10A,115C 'observ':66B 'past':102B,105C 'physic':82B,109C 'present':16A 'produc':49B,84B 'provid':24B,93B 'reanalysi':2A,22B,60B,61B,83B,104C 'reanalysis-era5-land-monthly-means':1A 'replay':51B 'resolut':40B 'sever':35B,88B 'time':92B 'use':78B 'variabl':33B,114C 'view':27B 'world':70B",
129-
"fts": "",
130-
"fair_timestamp": null
134+
"fts": ""
131135
}
132136
]

tests/data/dumped_resources2.txt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
"begin_date": "1950-01-01",
2424
"end_date": "2023-02-11",
2525
"publication_date": "2019-07-12",
26-
"record_update": "2025-08-27 07:45:50.510200+03:00",
26+
"record_update": "2025-12-24 10:11:04.217153+01:00",
2727
"resource_update": "2023-02-17",
2828
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
2929
"citation": null,
@@ -58,12 +58,15 @@
5858
"update_frequency": null,
5959
"variables": [],
6060
"content_size": null,
61+
"metadata_urls": [
62+
"http://cfconventions.org/documents.html"
63+
],
64+
"fair_timestamp": null,
6165
"fulltext": null,
6266
"high_priority_terms": "reanalysis ERA5 land",
6367
"popularity": 500,
6468
"search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B",
65-
"fts": "'era5':2 'land':3 'reanalysi':1",
66-
"fair_timestamp": null
69+
"fts": "'era5':2 'land':3 'reanalysi':1"
6770
},
6871
{
6972
"resource_id": 2,
@@ -89,7 +92,7 @@
8992
"begin_date": "1950-01-01",
9093
"end_date": "2022-12-01",
9194
"publication_date": "2019-06-23",
92-
"record_update": "2025-08-27 07:45:50.531565+03:00",
95+
"record_update": "2025-12-24 10:11:04.257466+01:00",
9396
"resource_update": "2023-02-17",
9497
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
9598
"citation": null,
@@ -122,11 +125,12 @@
122125
"update_frequency": null,
123126
"variables": [],
124127
"content_size": null,
128+
"metadata_urls": [],
129+
"fair_timestamp": null,
125130
"fulltext": "climate reanalysis past land era5 hydrology physics biosphere copernicus c3s conditions variables monthly means",
126131
"high_priority_terms": "",
127132
"popularity": 1,
128133
"search_field": "'1950':14A 'accur':95B 'across':68B 'averag':11A 'back':90B 'biospher':110C 'c3s':112C 'climat':59B,99B,103C 'combin':62B 'compar':41B 'complet':74B 'compon':54B 'condit':113C 'consist':26B,76B 'copernicus':111C 'data':12A,64B,85B 'dataset':23B,77B 'decad':36B,89B 'descript':96B 'ecmwf':57B 'enhanc':39B 'era5':3A,8A,18B,43B,45B,58B,107C 'era5-land':7A,17B,44B 'evolut':30B 'global':73B 'goe':87B 'hydrolog':108C 'land':4A,9A,19B,32B,46B,53B,106C 'law':80B 'mean':6A,116C 'model':63B 'month':5A,10A,115C 'observ':66B 'past':102B,105C 'physic':82B,109C 'present':16A 'produc':49B,84B 'provid':24B,93B 'reanalysi':2A,22B,60B,61B,83B,104C 'reanalysis-era5-land-monthly-means':1A 'replay':51B 'resolut':40B 'sever':35B,88B 'time':92B 'use':78B 'variabl':33B,114C 'view':27B 'world':70B",
129-
"fts": "",
130-
"fair_timestamp": null
134+
"fts": ""
131135
}
132136
]

tests/data/dumped_resources3.txt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"adaptor_properties_hash": "3cfceb3cb45d4fb56d6dc29575c36af4",
1111
"api_enforce_constraints": true,
1212
"disabled_reason": null,
13-
"sources_hash": "3cbb4e51e9993fac651033ea7334f7a5",
13+
"sources_hash": "29a88c88049cd4dbf385dd1a2584140f",
1414
"related_resources_keywords": [],
1515
"sanity_check": null,
1616
"sanity_check_conf": null,
@@ -23,7 +23,7 @@
2323
"begin_date": "1950-01-01",
2424
"end_date": "2023-02-11",
2525
"publication_date": "2019-07-12",
26-
"record_update": "2025-08-27 07:45:51.253762+03:00",
26+
"record_update": "2025-12-24 10:11:05.982515+01:00",
2727
"resource_update": "2023-02-17",
2828
"abstract": "ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis. Reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past.",
2929
"citation": null,
@@ -58,11 +58,14 @@
5858
"update_frequency": null,
5959
"variables": [],
6060
"content_size": null,
61+
"metadata_urls": [
62+
"http://cfconventions.org/documents.html"
63+
],
64+
"fair_timestamp": null,
6165
"fulltext": null,
6266
"high_priority_terms": "reanalysis ERA5 land",
6367
"popularity": 500,
6468
"search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B",
65-
"fts": "'era5':2 'land':3 'reanalysi':1",
66-
"fair_timestamp": null
69+
"fts": "'era5':2 'land':3 'reanalysi':1"
6770
}
6871
]

0 commit comments

Comments
 (0)