Skip to content

Commit e45de1c

Browse files
committed
Add string sanitisation for plate names
1 parent 2aada92 commit e45de1c

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

fractal_tasks_core/cellvoyager/metadata.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import fnmatch
1616
import logging
1717
import math
18+
import string
1819
from pathlib import Path
1920
from typing import Optional
2021
from typing import Union
@@ -477,3 +478,31 @@ def check_group_consistency(grouped_df: pd.DataFrame, message: str = ""):
477478
f"{message}\n"
478479
f"Difference dataframe: \n{diff_df}"
479480
)
481+
482+
483+
__SPECIAL_CHARACTERS__ = f"{string.punctuation}{string.whitespace}"
484+
485+
486+
def sanitize_string(value: str) -> str:
487+
"""
488+
Make string safe to be used in file/folder names.
489+
490+
Make the string lower-case, and replace any special character with an
491+
underscore, where special characters are:
492+
493+
494+
>>> string.punctuation
495+
'!"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~'
496+
>>> string.whitespace
497+
' \\t\\n\\r\\x0b\\x0c'
498+
499+
Args:
500+
value: Input string
501+
502+
Returns:
503+
Sanitized value
504+
"""
505+
new_value = value.lower()
506+
for character in __SPECIAL_CHARACTERS__:
507+
new_value = new_value.replace(character, "_")
508+
return new_value

fractal_tasks_core/tasks/cellvoyager_to_ome_zarr_init.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from fractal_tasks_core.cellvoyager.metadata import (
2828
parse_yokogawa_metadata,
2929
)
30+
from fractal_tasks_core.cellvoyager.metadata import sanitize_string
3031
from fractal_tasks_core.cellvoyager.wells import generate_row_col_split
3132
from fractal_tasks_core.cellvoyager.wells import get_filename_well_id
3233
from fractal_tasks_core.channels import check_unique_wavelength_ids
@@ -241,8 +242,9 @@ def cellvoyager_to_ome_zarr_init(
241242
parallelization_list = []
242243

243244
for plate in plates:
245+
plate_name = sanitize_string(plate)
244246
# Define plate zarr
245-
relative_zarrurl = f"{plate}.zarr"
247+
relative_zarrurl = f"{plate_name}.zarr"
246248
in_path = dict_plate_paths[plate]
247249
logger.info(f"Creating {relative_zarrurl}")
248250
# Call zarr.open_group wrapper, which handles overwrite=True/False
@@ -337,7 +339,7 @@ def cellvoyager_to_ome_zarr_init(
337339
well_wavelength_ids = sorted(list(set(well_wavelength_ids)))
338340
if well_wavelength_ids != actual_wavelength_ids:
339341
raise ValueError(
340-
f"ERROR: well {well} in plate {plate} (prefix: "
342+
f"ERROR: well {well} in plate {plate_name} (prefix: "
341343
f"{plate_prefix}) has missing channels.\n"
342344
f"Expected: {actual_channels}\n"
343345
f"Found: {well_wavelength_ids}.\n"
@@ -355,7 +357,7 @@ def cellvoyager_to_ome_zarr_init(
355357
col_list = sorted(list(set(col_list)))
356358

357359
plate_attrs = {
358-
"acquisitions": [{"id": 0, "name": plate}],
360+
"acquisitions": [{"id": 0, "name": plate_name}],
359361
"columns": [{"name": col} for col in col_list],
360362
"rows": [{"name": row} for row in row_list],
361363
"version": __OME_NGFF_VERSION__,
@@ -377,7 +379,9 @@ def cellvoyager_to_ome_zarr_init(
377379
for row, column in well_rows_columns:
378380
parallelization_list.append(
379381
{
380-
"zarr_url": f"{zarr_dir}/{plate}.zarr/{row}/{column}/0",
382+
"zarr_url": (
383+
f"{zarr_dir}/{plate_name}.zarr/{row}/{column}/0"
384+
),
381385
"init_args": InitArgsCellVoyager(
382386
image_dir=in_path,
383387
plate_prefix=plate_prefix,

fractal_tasks_core/tasks/cellvoyager_to_ome_zarr_init_multiplex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from fractal_tasks_core.cellvoyager.metadata import (
3030
parse_yokogawa_metadata,
3131
)
32+
from fractal_tasks_core.cellvoyager.metadata import sanitize_string
3233
from fractal_tasks_core.cellvoyager.wells import generate_row_col_split
3334
from fractal_tasks_core.cellvoyager.wells import get_filename_well_id
3435
from fractal_tasks_core.channels import check_unique_wavelength_ids
@@ -254,7 +255,7 @@ def cellvoyager_to_ome_zarr_init_multiplex(
254255
current_plates = [item["plate"] for item in dict_acquisitions.values()]
255256
if len(set(current_plates)) > 1:
256257
raise ValueError(f"{current_plates=}")
257-
plate = current_plates[0]
258+
plate = sanitize_string(current_plates[0])
258259

259260
zarrurl = dict_acquisitions[acquisitions_sorted[0]]["plate"] + ".zarr"
260261
full_zarrurl = str(Path(zarr_dir) / zarrurl)

0 commit comments

Comments
 (0)