Skip to content

Commit 4e3171b

Browse files
authored
Process TIFF files for cryo-CLEM workflow (#295)
Added function, API endpoint, command line command, and cluster submission script for creating image stacks from individual TIFF files.
1 parent 7b7bc4c commit 4e3171b

File tree

14 files changed

+1422
-664
lines changed

14 files changed

+1422
-664
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ classifiers = [
3030
"Programming Language :: Python :: 3.12",
3131
]
3232
dependencies = [
33+
"defusedxml", # For safely parsing XML files
3334
"pydantic<2", # Pip hops between installing v2.7 or v1.10 depending on which of the additional dependencies are requested
3435
"requests",
3536
"rich",
@@ -85,9 +86,11 @@ murfey = "murfey.client:run"
8586
"murfey.simulate" = "murfey.cli.dummy:run"
8687
"murfey.spa_inject" = "murfey.cli.inject_spa_processing:run"
8788
"murfey.spa_ispyb_entries" = "murfey.cli.spa_ispyb_messages:run"
89+
"murfey.tiff_to_stack" = "murfey.cli.tiff_to_stack:run"
8890
"murfey.transfer" = "murfey.cli.transfer:run"
8991
[project.entry-points."murfey.workflows"]
9092
"lif_to_tiff" = "murfey.workflows.lif_to_tiff:zocalo_cluster_request"
93+
"tiff_to_stack" = "murfey.workflows.tiff_to_stack:zocalo_cluster_request"
9194

9295
[tool.setuptools]
9396
package-dir = {"" = "src"}

src/murfey/cli/lif_to_tiff.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import argparse
22
from pathlib import Path
33

4-
from murfey.util.lif import convert_lif_to_tiff
4+
from murfey.util.clem import convert_lif_to_tiff
55

66

77
def run():
8-
parser = argparse.ArgumentParser(description="Convert LIF to TIFF")
8+
parser = argparse.ArgumentParser(
9+
description="Convert LIF files into TIFF image stacks"
10+
)
911

1012
parser.add_argument(
11-
nargs=1, dest="lif_path", help="Path to LIF file for conversion"
13+
dest="lif_path",
14+
type=str,
15+
help="Path to LIF file for conversion",
1216
)
1317
parser.add_argument(
1418
"--root-dir",
1519
default="images",
1620
type=str,
17-
help="Top subdirectory that LIF files are stored in. Used to determine destination of TIFFs",
21+
help="Top subdirectory that LIF files are stored in. Used to determine destination of the created TIFF image stacks",
1822
)
1923
parser.add_argument(
2024
"-n", "--num-procs", default=1, type=int, help="Number of processes"
@@ -23,7 +27,7 @@ def run():
2327
args = parser.parse_args()
2428

2529
convert_lif_to_tiff(
26-
Path(args.lif_path),
30+
file=Path(args.lif_path),
2731
root_folder=args.root_dir,
2832
number_of_processes=args.num_procs,
2933
)

src/murfey/cli/tiff_to_stack.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import argparse
2+
from pathlib import Path
3+
4+
from murfey.util.clem import convert_tiff_to_stack
5+
6+
7+
def run():
8+
# Create an argument parser
9+
parser = argparse.ArgumentParser(
10+
description="Convert individual TIFF files into image stacks"
11+
)
12+
# Path to single TIFF file from series (Mandatory)
13+
parser.add_argument(
14+
dest="tiff_path",
15+
type=str,
16+
help="Path to any one of the TIFF files from the series to be processed",
17+
)
18+
# Root directory (Optional)
19+
parser.add_argument(
20+
"--root-dir",
21+
default="images",
22+
type=str,
23+
help="Top subdirectory that raw TIFF files are stored in. Used to determine destination of the created image stacks",
24+
)
25+
# Path to metadata file (Optional)
26+
parser.add_argument(
27+
"--metadata",
28+
default=None,
29+
type=str,
30+
help="Path to the XLIF file associated with this dataset. If not provided, the script will use relative file paths to find what it thinks is the appropriate file",
31+
)
32+
# Parse the arguments
33+
args = parser.parse_args()
34+
35+
# Convert to correct object types
36+
tiff_file = Path(args.tiff_path)
37+
# Generate list from the single file provided
38+
tiff_list = [
39+
f.resolve()
40+
for f in tiff_file.parent.glob("./*")
41+
if f.suffix in {".tif", ".tiff"}
42+
and f.stem.startswith(tiff_file.stem.split("--")[0])
43+
]
44+
# Sort by series, then channel, then frame
45+
tiff_list.sort(
46+
key=lambda e: (
47+
e.stem.split("--")[0],
48+
e.stem.split("--")[2],
49+
e.stem.split("--")[1],
50+
)
51+
)
52+
53+
# Resolve for metadata argument
54+
if not args.metadata:
55+
metadata = None
56+
else:
57+
metadata = Path(args.metadata)
58+
59+
convert_tiff_to_stack(
60+
tiff_list=tiff_list,
61+
root_folder=args.root_dir,
62+
metadata_file=metadata,
63+
)

src/murfey/client/contexts/clem.py

Lines changed: 136 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010
import logging
1111
from datetime import datetime
1212
from pathlib import Path
13-
from typing import Optional
13+
from typing import Dict, List, Optional
14+
15+
from defusedxml.ElementTree import parse
1416

1517
from murfey.client.context import Context
1618
from murfey.client.instance_environment import MurfeyInstanceEnvironment
17-
from murfey.util import capture_post, get_machine_config
19+
from murfey.util import capture_post, get_machine_config, sanitise
20+
from murfey.util.clem import xml
1821

1922
# Create logger object
2023
logger = logging.getLogger("murfey.client.contexts.clem")
@@ -50,10 +53,20 @@ def _get_source(
5053
return None
5154

5255

56+
# WORK IN PROGRESS
57+
# Will need to add context for TIFF files associated with CLEM
5358
class CLEMContext(Context):
5459
def __init__(self, acquisition_software: str, basepath: Path):
5560
super().__init__("CLEM", acquisition_software)
5661
self._basepath = basepath
62+
# CLEM contexts for "auto-save" acquisition mode
63+
self._tiff_series: Dict[str, List[str]] = {} # Series name : List of TIFF paths
64+
self._tiff_timestamps: Dict[str, List[float]] = {} # Series name: Timestamps
65+
self._tiff_sizes: Dict[str, List[int]] = {} # Series name: File sizes
66+
self._series_metadata: Dict[str, str] = {} # Series name: Metadata file path
67+
self._metadata_timestamp: Dict[str, float] = {} # Series name: Timestamp
68+
self._metadata_size: Dict[str, int] = {} # Series name: File size
69+
self._files_in_series: Dict[str, int] = {} # Series name : Total TIFFs
5770

5871
def post_transfer(
5972
self,
@@ -65,7 +78,126 @@ def post_transfer(
6578
super().post_transfer(
6679
transferred_file, role=role, environment=environment, **kwargs
6780
)
68-
# Check if file is a LIF file
81+
82+
# Process files generated by "auto-save" acquisition mode
83+
# These include TIF/TIFF and XLIF files
84+
if transferred_file.suffix in (".tif", ".tiff", ".xlif"):
85+
# Type checking to satisfy MyPy
86+
if not environment:
87+
logger.warning("No environment passed in")
88+
return True
89+
90+
# Location of the file on the client PC
91+
source = _get_source(transferred_file, environment)
92+
# Type checking to satisfy MyPy
93+
if not source:
94+
logger.warning(f"No source found for file {transferred_file}")
95+
return True
96+
97+
# Get the Path on the DLS file system
98+
file_path = _file_transferred_to(
99+
environment=environment,
100+
source=source,
101+
file_path=transferred_file,
102+
)
103+
if not file_path:
104+
logger.warning(
105+
f"File associated with {sanitise(str(transferred_file))} not found on the storage system"
106+
)
107+
return False
108+
109+
# Process TIF/TIFF files
110+
if any(transferred_file.suffix == s for s in [".tif", ".tiff"]):
111+
# Files should be named "PositionX--ZXX--CXX.tif" by default
112+
if not len(transferred_file.stem.split("--")) == 3:
113+
logger.warning(
114+
"This TIFF file is likely not part of the CLEM workflow"
115+
)
116+
return False # Not sure if None, False, or True is most appropriate
117+
118+
# Get series name from file name
119+
series_name = "/".join(
120+
[*file_path.parent.parts[-2:], file_path.stem.split("--")[0]]
121+
) # The previous 2 parent directories should be unique enough
122+
123+
# Create key-value pairs containing empty list if not already present
124+
if series_name not in self._tiff_series.keys():
125+
self._tiff_series[series_name] = []
126+
if series_name not in self._tiff_sizes.keys():
127+
self._tiff_sizes[series_name] = []
128+
if series_name not in self._tiff_timestamps.keys():
129+
self._tiff_timestamps[series_name] = []
130+
# Append information to list
131+
self._tiff_series[series_name].append(str(file_path))
132+
self._tiff_sizes[series_name].append(transferred_file.stat().st_size)
133+
self._tiff_timestamps[series_name].append(
134+
transferred_file.stat().st_ctime
135+
)
136+
137+
# Process XLIF files
138+
if transferred_file.suffix == ".xlif":
139+
140+
# XLIF files don't have the "--ZXX--CXX" additions in the file name
141+
# But they have "/Metadata/" as the immediate parent
142+
series_name = "/".join(
143+
[*file_path.parent.parent.parts[-2:], file_path.stem]
144+
) # The previous 2 parent directories should be unique enough
145+
146+
# Extract metadata to get the expected size of the series
147+
metadata = parse(file_path).getroot()
148+
metadata = xml.get_image_elements(metadata)[0]
149+
150+
# Get channel and dimension information
151+
channels = metadata.findall(
152+
"Data/Image/ImageDescription/Channels/ChannelDescription"
153+
)
154+
dimensions = metadata.findall(
155+
"Data/Image/ImageDescription/Dimensions/DimensionDescription"
156+
)
157+
158+
# Calculate expected number of files for this series
159+
num_channels = len(channels)
160+
num_frames = (
161+
int(dimensions[2].attrib["NumberOfElements"])
162+
if len(dimensions) > 2
163+
else 1
164+
)
165+
num_files = num_channels * num_frames
166+
167+
# Update dictionary entries
168+
self._files_in_series[series_name] = num_files
169+
self._series_metadata[series_name] = str(file_path)
170+
self._metadata_size[series_name] = transferred_file.stat().st_size
171+
self._metadata_timestamp[series_name] = transferred_file.stat().st_ctime
172+
173+
# Post message if all files for the associated series have been collected
174+
if (
175+
len(self._tiff_series[series_name])
176+
== self._files_in_series[series_name]
177+
):
178+
179+
# Construct URL for Murfey server to communicate with
180+
url = f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/tiff_to_stack"
181+
if not url:
182+
logger.warning("No URL found for the environment")
183+
return True
184+
185+
# Post the message and log any errors that arise
186+
capture_post(
187+
url,
188+
json={
189+
"series_name": series_name,
190+
"tiff_files": self._tiff_series[series_name],
191+
"tiff_sizes": self._tiff_sizes[series_name],
192+
"tiff_timestamps": self._tiff_timestamps[series_name],
193+
"series_metadata": self._series_metadata[series_name],
194+
"metadata_size": self._metadata_size[series_name],
195+
"metadata_timestamp": self._metadata_timestamp[series_name],
196+
"description": "",
197+
},
198+
)
199+
200+
# Process LIF files
69201
if transferred_file.suffix == ".lif":
70202
# Type checking to satisfy MyPy
71203
if not environment:
@@ -83,7 +215,7 @@ def post_transfer(
83215
url = f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/lif_to_tiff"
84216
# Type checking to satisfy MyPy
85217
if not url:
86-
logger.warning("No url found for the environment")
218+
logger.warning("No URL found for the environment")
87219
return True
88220

89221
# Get the Path on the DLS file system

src/murfey/server/clem/api.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@
55
from fastapi import APIRouter
66

77
from murfey.server import _transport_object
8-
from murfey.util.lif import convert_lif_to_tiff
9-
from murfey.util.models import LifFileInfo
8+
from murfey.util.clem import convert_lif_to_tiff, convert_tiff_to_stack
9+
from murfey.util.models import LifFileInfo, TiffSeriesInfo
1010

1111
# Create APIRouter class object
1212
router = APIRouter()
1313

1414

15-
# Allow function to be seen as an endpoint by the router
16-
@router.post("/sessions/{session_id}/lif_to_tiff")
15+
@router.post("/sessions/{session_id}/lif_to_tiff") # API posts to this URL
1716
def lif_to_tiff(
1817
session_id: int, # Used by the decorator
1918
lif_info: LifFileInfo,
@@ -23,10 +22,38 @@ def lif_to_tiff(
2322
)
2423
if murfey_workflows:
2524
murfey_workflows[0].load()(
26-
file=lif_info.name, root_folder="images", messenger=_transport_object
25+
# Match the arguments found in murfey.workflows.lif_to_tiff
26+
file=lif_info.name,
27+
root_folder="images",
28+
messenger=_transport_object,
2729
)
2830
else:
2931
convert_lif_to_tiff(
3032
file=lif_info.name,
3133
root_folder="images",
3234
)
35+
36+
37+
# WORK IN PROGRESS
38+
@router.post("/sessions/{session_id}/tiff_to_stack")
39+
def tiff_to_stack(
40+
session_id: int, # Used by the decorator
41+
tiff_info: TiffSeriesInfo,
42+
):
43+
murfey_workflows = importlib.metadata.entry_points().select(
44+
group="murfey.workflows", name="tiff_to_stack"
45+
)
46+
if murfey_workflows:
47+
murfey_workflows[0].load()(
48+
# Match the arguments found in murfey.workflows.tiff_to_stack
49+
file=tiff_info.tiff_files,
50+
root_folder="images",
51+
metadata=tiff_info.series_metadata,
52+
messenger=_transport_object,
53+
)
54+
else:
55+
convert_tiff_to_stack(
56+
tiff_list=tiff_info.tiff_files,
57+
root_folder="images",
58+
metadata_file=tiff_info.series_metadata,
59+
)

0 commit comments

Comments
 (0)