Skip to content

Commit 432c50b

Browse files
Remove serialem code and revise context determination (#632)
The SerialEM code has never been used, and if we want to support SerialEM in future it would be better to redo it. This removes all code relating to it. Also improves and tests the determination of the context in the analyser. There are some potential difficulties with distinguishing data and metadata. The biggest problem is mdoc files which appear with the same name in both folders. --------- Co-authored-by: Eu Pin Tien <[email protected]>
1 parent 6660cdf commit 432c50b

File tree

5 files changed

+156
-92
lines changed

5 files changed

+156
-92
lines changed

src/murfey/client/analyser.py

Lines changed: 35 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ def _find_context(self, file_path: Path) -> bool:
118118
in the Context classes themselves.
119119
"""
120120
logger.debug(f"Finding context using file {str(file_path)!r}")
121-
if "atlas" in file_path.parts:
122-
self._context = SPAMetadataContext("epu", self._basepath)
123-
return True
124121

125122
# CLEM workflow checks
126123
# Look for LIF and XLIF files
@@ -129,75 +126,60 @@ def _find_context(self, file_path: Path) -> bool:
129126
return True
130127
# Look for TIFF files associated with CLEM workflow
131128
# Leica's autosave mode seems to name the TIFFs in the format
132-
# PostionXX--ZXX-CXX.tif
133-
if (
134-
"--" in file_path.name
135-
and file_path.suffix in (".tiff", ".tif")
136-
and self._environment
137-
):
138-
created_directories = set(
139-
get_machine_config_client(
140-
str(self._environment.url.geturl()),
141-
instrument_name=self._environment.instrument_name,
142-
demo=self._environment.demo,
143-
).get("analyse_created_directories", [])
144-
)
145-
if created_directories.intersection(set(file_path.parts)):
146-
self._context = CLEMContext("leica", self._basepath)
147-
return True
129+
# PostionXX--ZXX--CXX.tif
130+
if all(
131+
pattern in file_path.name for pattern in ("--Z", "--C")
132+
) and file_path.suffix in (".tiff", ".tif"):
133+
self._context = CLEMContext("leica", self._basepath)
134+
return True
148135

149136
# Tomography and SPA workflow checks
150-
split_file_name = file_path.name.split("_")
151-
if split_file_name:
152-
# Skip context for gain files
153-
if "gain" in split_file_name[-1]:
137+
if "atlas" in file_path.parts:
138+
self._context = SPAMetadataContext("epu", self._basepath)
139+
return True
140+
141+
if "Metadata" in file_path.parts or file_path.name == "EpuSession.dm":
142+
self._context = SPAMetadataContext("epu", self._basepath)
143+
return True
144+
elif (
145+
"Batch" in file_path.parts
146+
or "SearchMaps" in file_path.parts
147+
or "Thumbnails" in file_path.parts
148+
or file_path.name == "Session.dm"
149+
):
150+
self._context = TomographyMetadataContext("tomo", self._basepath)
151+
return True
152+
153+
split_file_stem = file_path.stem.split("_")
154+
if split_file_stem:
155+
if split_file_stem[-1] == "gain":
154156
return False
155157

156158
# Files starting with "FoilHole" belong to the SPA workflow
157-
if split_file_name[0].startswith("FoilHole"):
159+
if split_file_stem[0].startswith("FoilHole") and split_file_stem[-1] in [
160+
"Fractions",
161+
"fractions",
162+
"EER",
163+
]:
158164
if not self._context:
159165
logger.info("Acquisition software: EPU")
160166
self._context = SPAModularContext("epu", self._basepath)
161167
self.parameters_model = ProcessingParametersSPA
162168
return True
163169

164170
# Files starting with "Position" belong to the standard tomography workflow
171+
# NOTE: not completely reliable, mdocs can be in tomography metadata as well
165172
if (
166-
split_file_name[0] == "Position"
173+
split_file_stem[0] == "Position"
167174
or "[" in file_path.name
168-
or "Fractions" in split_file_name[-1]
169-
or "fractions" in split_file_name[-1]
170-
or "EER" in split_file_name[-1]
175+
or split_file_stem[-1] in ["Fractions", "fractions", "EER"]
176+
or file_path.suffix == ".mdoc"
171177
):
172178
if not self._context:
173179
logger.info("Acquisition software: tomo")
174180
self._context = TomographyContext("tomo", self._basepath)
175181
self.parameters_model = ProcessingParametersTomo
176182
return True
177-
178-
# Files with these suffixes belong to the serial EM tomography workflow
179-
if file_path.suffix in (".mrc", ".tiff", ".tif", ".eer"):
180-
# Ignore batch files and search maps
181-
if any(p in file_path.parts for p in ("Batch", "SearchMaps")):
182-
return False
183-
# Ignore JPG files
184-
if file_path.with_suffix(".jpg").is_file():
185-
return False
186-
# Ignore the averaged movies written out by the Falcon
187-
if (
188-
len(
189-
list(
190-
file_path.parent.glob(
191-
f"{file_path.name}*{file_path.suffix}"
192-
)
193-
)
194-
)
195-
> 1
196-
):
197-
return False
198-
self._context = TomographyContext("serialem", self._basepath)
199-
self.parameters_model = ProcessingParametersTomo
200-
return True
201183
return False
202184

203185
def post_transfer(self, transferred_file: Path):
@@ -288,12 +270,7 @@ def _analyse(self):
288270
if not dc_metadata:
289271
try:
290272
dc_metadata = self._context.gather_metadata(
291-
(
292-
transferred_file.with_suffix(".mdoc")
293-
if self._context._acquisition_software
294-
== "serialem"
295-
else self._xml_file(transferred_file)
296-
),
273+
self._xml_file(transferred_file),
297274
environment=self._environment,
298275
)
299276
except NotImplementedError:

src/murfey/client/contexts/tomo.py

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -400,33 +400,6 @@ def _add_tomo_tilt(
400400
required_strings=required_strings,
401401
)
402402

403-
def _add_serialem_tilt(
404-
self, file_path: Path, environment: MurfeyInstanceEnvironment | None = None
405-
) -> List[str]:
406-
delimiters = ("_", "-")
407-
for d in delimiters:
408-
if file_path.name.count(d) > 1:
409-
delimiter = d
410-
break
411-
else:
412-
delimiter = delimiters[0]
413-
414-
def _extract_tilt_series(p: Path) -> str:
415-
split = p.name.split(delimiter)
416-
for s in split:
417-
if s.isdigit():
418-
return s
419-
raise ValueError(
420-
f"No digits found in {p.name} after splitting on {delimiter}"
421-
)
422-
423-
return self._add_tilt(
424-
file_path,
425-
lambda x: ".".join(x.name.split(delimiter)[-1].split(".")[:-1]),
426-
environment=environment,
427-
required_strings=[],
428-
)
429-
430403
def post_transfer(
431404
self,
432405
transferred_file: Path,
@@ -464,10 +437,8 @@ def post_transfer(
464437
required_strings=kwargs.get("required_strings")
465438
or required_strings,
466439
)
467-
elif self._acquisition_software == "serialem":
468-
completed_tilts = self._add_serialem_tilt(
469-
transferred_file, environment=environment
470-
)
440+
else:
441+
logger.warning(f"Unknown data file {transferred_file}")
471442
if transferred_file.suffix == ".mdoc":
472443
with open(transferred_file, "r") as md:
473444
tilt_series = transferred_file.stem

src/murfey/client/multigrid_control.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from murfey.util.api import url_path_for
2424
from murfey.util.client import capture_delete, capture_post, get_machine_config_client
2525

26-
log = logging.getLogger("murfey.client.mutligrid_control")
26+
log = logging.getLogger("murfey.client.multigrid_control")
2727

2828

2929
@dataclass

src/murfey/util/dummy_setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def initialise(dummy_location: Path) -> Path:
2424
yaml.dump(
2525
{
2626
"m12": {
27-
"acquisition_software": ["epu", "tomo", "serialem"],
27+
"acquisition_software": ["epu", "tomo"],
2828
"data_directories": [str(detector_dir)],
2929
"rsync_basepath": str(dummy_location),
3030
"calibrations": {"dummy": 0},

tests/client/test_analyser.py

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,122 @@
11
from __future__ import annotations
22

3+
import pytest
4+
35
from murfey.client.analyser import Analyser
6+
from murfey.client.contexts.clem import CLEMContext
7+
from murfey.client.contexts.spa import SPAModularContext
8+
from murfey.client.contexts.spa_metadata import SPAMetadataContext
9+
from murfey.client.contexts.tomo import TomographyContext
10+
from murfey.client.contexts.tomo_metadata import TomographyMetadataContext
11+
from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo
12+
13+
example_files = [
14+
# Tomography
15+
["visit/Position_1_001_0.0_20250715_012434_fractions.tiff", TomographyContext],
16+
["visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", TomographyContext],
17+
["visit/Position_1_2_003_6.0_20250715_012434_EER.eer", TomographyContext],
18+
["visit/name1_004_9.0_20250715_012434_fractions.tiff", TomographyContext],
19+
["visit/Position_1_[30.0].tiff", TomographyContext],
20+
["visit/Position_1.mdoc", TomographyContext],
21+
["visit/name1_2.mdoc", TomographyContext],
22+
# Tomography metadata
23+
["visit/Session.dm", TomographyMetadataContext],
24+
["visit/SearchMaps/SearchMap.xml", TomographyMetadataContext],
25+
["visit/Batch/BatchPositionsList.xml", TomographyMetadataContext],
26+
["visit/Thumbnails/file.mrc", TomographyMetadataContext],
27+
# SPA
28+
["visit/FoilHole_01234_fractions.tiff", SPAModularContext],
29+
["visit/FoilHole_01234_EER.eer", SPAModularContext],
30+
# SPA metadata
31+
["atlas/atlas.mrc", SPAMetadataContext],
32+
["visit/EpuSession.dm", SPAMetadataContext],
33+
["visit/Metadata/GridSquare.dm", SPAMetadataContext],
34+
# CLEM LIF file
35+
["visit/images/test_file.lif", CLEMContext],
36+
# CLEM TIFF files
37+
[
38+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12--Z02--C01.tif",
39+
CLEMContext,
40+
],
41+
[
42+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12_Lng_LVCC--Z02--C01.tif",
43+
CLEMContext,
44+
],
45+
[
46+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Series001--Z00--C00.tif",
47+
CLEMContext,
48+
],
49+
[
50+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Series001_Lng_LVCC--Z00--C00.tif",
51+
CLEMContext,
52+
],
53+
# CLEM TIFF file accompanying metadata
54+
[
55+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12.xlif",
56+
CLEMContext,
57+
],
58+
[
59+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12_Lng_LVCC.xlif",
60+
CLEMContext,
61+
],
62+
[
63+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_histo.xlif",
64+
CLEMContext,
65+
],
66+
[
67+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_Lng_LVCC_histo.xlif",
68+
CLEMContext,
69+
],
70+
[
71+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001.xlif",
72+
CLEMContext,
73+
],
74+
[
75+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001_Lng_LVCC.xlif",
76+
CLEMContext,
77+
],
78+
]
79+
80+
81+
@pytest.mark.parametrize("file_and_context", example_files)
82+
def test_find_context(file_and_context, tmp_path):
83+
# Unpack parametrised variables
84+
file_name, context = file_and_context
85+
86+
# Pass the file to the Analyser; add environment as needed
87+
analyser = Analyser(basepath_local=tmp_path)
88+
89+
# Check that the results are as expected
90+
assert analyser._find_context(tmp_path / file_name)
91+
assert isinstance(analyser._context, context)
92+
93+
# Checks for the specific workflow contexts
94+
if isinstance(analyser._context, TomographyContext):
95+
assert analyser.parameters_model == ProcessingParametersTomo
96+
if isinstance(analyser._context, SPAModularContext):
97+
assert analyser.parameters_model == ProcessingParametersSPA
98+
99+
100+
contextless_files = [
101+
"visit/Position_1_gain.tiff",
102+
"visit/FoilHole_01234_gain.tiff",
103+
"visit/file_1.mrc",
104+
"visit/FoilHole_01234.mrc",
105+
"visit/FoilHole_01234.jpg",
106+
"visit/FoilHole_01234.xml",
107+
"visit/images/test_file.lifext",
108+
"visit/images/2024_03_14_12_34_56--Project001/Project001.xlef",
109+
"visit/images/2024_03_14_12_34_56--Project001/Project001.xlef.lock",
110+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Position 12_histo.lof",
111+
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Series001_histo.lof",
112+
]
113+
114+
115+
@pytest.mark.parametrize("bad_file", contextless_files)
116+
def test_ignore_contextless_files(bad_file, tmp_path):
117+
analyser = Analyser(tmp_path)
118+
assert not analyser._find_context(tmp_path / bad_file)
119+
assert not analyser._context
4120

5121

6122
def test_analyser_setup_and_stopping(tmp_path):
@@ -23,7 +139,7 @@ def test_analyser_tomo_determination(tmp_path):
23139

24140

25141
def test_analyser_epu_determination(tmp_path):
26-
tomo_file = tmp_path / "FoilHole_12345_Data_6789.tiff"
142+
tomo_file = tmp_path / "FoilHole_12345_Data_6789_Fractions.tiff"
27143
analyser = Analyser(tmp_path)
28144
analyser.start()
29145
analyser.queue.put(tomo_file)

0 commit comments

Comments
 (0)