Skip to content

Commit c17550d

Browse files
committed
updated format string method to generate titles in a standard format
1 parent 3eaadd4 commit c17550d

File tree

2 files changed

+48
-10
lines changed

2 files changed

+48
-10
lines changed

deep_code/tests/utils/test_dataset_stac_generator.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,34 @@ def test_open_dataset_failure(self, mock_logger, mock_new_data_store):
217217
)
218218
self.assertIn("Public store, Authenticated store", str(context.exception))
219219
self.assertEqual(mock_new_data_store.call_count, 2)
220+
221+
class TestFormatString(unittest.TestCase):
222+
def test_single_word(self):
223+
self.assertEqual(OscDatasetStacGenerator.format_string("temperature"), "Temperature")
224+
self.assertEqual(OscDatasetStacGenerator.format_string("temp"), "Temp")
225+
self.assertEqual(OscDatasetStacGenerator.format_string("hello"), "Hello")
226+
227+
def test_multiple_words_with_spaces(self):
228+
self.assertEqual(OscDatasetStacGenerator.format_string("surface temp"), "Surface Temp")
229+
self.assertEqual(OscDatasetStacGenerator.format_string("this is a test"), "This Is A Test")
230+
231+
def test_multiple_words_with_underscores(self):
232+
self.assertEqual(OscDatasetStacGenerator.format_string("surface_temp"), "Surface Temp")
233+
self.assertEqual(OscDatasetStacGenerator.format_string("this_is_a_test"), "This Is A Test")
234+
235+
def test_mixed_spaces_and_underscores(self):
236+
self.assertEqual(OscDatasetStacGenerator.format_string("surface_temp and_more"), "Surface Temp And More")
237+
self.assertEqual(OscDatasetStacGenerator.format_string("mixed_case_with_underscores_and spaces"), "Mixed Case With Underscores And Spaces")
238+
239+
def test_edge_cases(self):
240+
# Empty string
241+
self.assertEqual(OscDatasetStacGenerator.format_string(""), "")
242+
# Single word with trailing underscore
243+
self.assertEqual(OscDatasetStacGenerator.format_string("temperature_"), "Temperature")
244+
# Single word with leading underscore
245+
self.assertEqual(OscDatasetStacGenerator.format_string("_temp"), "Temp")
246+
# Single word with leading/trailing spaces
247+
self.assertEqual(OscDatasetStacGenerator.format_string(" hello "), "Hello")
248+
# Multiple spaces or underscores
249+
self.assertEqual(OscDatasetStacGenerator.format_string("too___many___underscores"), "Too Many Underscores")
250+
self.assertEqual(OscDatasetStacGenerator.format_string("too many spaces"), "Too Many Spaces")

deep_code/utils/dataset_stac_generator.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,10 @@ def _get_temporal_extent(self) -> TemporalExtent:
175175

176176
@staticmethod
177177
def _normalize_name(name: str | None) -> str | None:
178-
return name.replace(" ", "-").lower() if name else None
178+
if name:
179+
return (name.replace(" ", "-").
180+
replace("_", "-").lower())
181+
return None
179182

180183
def _get_general_metadata(self) -> dict:
181184
return {
@@ -200,8 +203,10 @@ def extract_metadata_for_variable(self, variable_data) -> dict:
200203
def get_variable_ids(self) -> list[str]:
201204
"""Get variable IDs for all variables in the dataset."""
202205
variable_ids = list(self.variables_metadata.keys())
203-
# Remove 'crs' and 'spatial_ref' from the list if they exist
204-
return [var_id for var_id in variable_ids if var_id not in ["crs", "spatial_ref"]]
206+
# Remove 'crs' and 'spatial_ref' from the list if they exist, note that
207+
# spatial_ref will be normalized to spatial-ref in variable_ids and skipped.
208+
return [var_id for var_id in variable_ids if var_id not in ["crs",
209+
"spatial-ref"]]
205210

206211
def get_variables_metadata(self) -> dict[str, dict]:
207212
"""Extract metadata for all variables in the dataset."""
@@ -263,7 +268,7 @@ def build_variable_catalog(self, var_metadata) -> Catalog:
263268
var_catalog = Catalog(
264269
id=var_id,
265270
description=var_metadata.get("description"),
266-
title=var_id,
271+
title=self.format_string(var_id),
267272
stac_extensions=[
268273
"https://stac-extensions.github.io/themes/v1.0.0/schema.json"
269274
],
@@ -335,8 +340,7 @@ def update_product_base_catalog(self, product_catalog_path) -> Catalog:
335340
product_base_catalog.set_self_href(PRODUCT_BASE_CATALOG_SELF_HREF)
336341
return product_base_catalog
337342

338-
@staticmethod
339-
def update_variable_base_catalog(variable_base_catalog_path, variable_ids) \
343+
def update_variable_base_catalog(self, variable_base_catalog_path, variable_ids) \
340344
-> (
341345
Catalog):
342346
"""Link product to base product catalog"""
@@ -347,7 +351,7 @@ def update_variable_base_catalog(variable_base_catalog_path, variable_ids) \
347351
rel="child",
348352
target=f"./{var_id}/catalog.json",
349353
media_type="application/json",
350-
title=var_id,
354+
title=self.format_string(var_id),
351355
)
352356
)
353357
# 'self' link: the direct URL where this JSON is hosted
@@ -414,8 +418,11 @@ def update_existing_variable_catalog(self, var_file_path, var_id) -> Catalog:
414418
return existing_catalog
415419

416420
@staticmethod
417-
def format_string(s):
418-
return s.capitalize()
421+
def format_string(s: str) -> str:
422+
# Strip leading/trailing spaces/underscores and replace underscores with spaces
423+
words = s.strip(" _").replace("_", " ").replace("-", " ").split()
424+
# Capitalize each word and join them with a space
425+
return " ".join(word.capitalize() for word in words)
419426

420427
@staticmethod
421428
def build_theme(osc_themes: list[str]) -> Theme:
@@ -496,7 +503,7 @@ def build_dataset_stac_collection(self) -> Collection:
496503
rel="related",
497504
target=f"../../variables/{var}/catalog.json",
498505
media_type="application/json",
499-
title="Variable: " + var,
506+
title="Variable: " + self.format_string(var),
500507
)
501508
)
502509

0 commit comments

Comments
 (0)