Skip to content

Commit c5a5767

Browse files
Remove the assumption of a raw directory for processed outputs (#600)
Resolves one of the doppio-live issues, where we assume a raw directory for SPA. This instead makes it a configurable option, where the raw is added to the path if process_multiple_datasets is True, so that in cases such as eBIC where we send multiple datasets to one location they are still divided up. Also makes the path construction consistent between SPA and tomo, which will need checking.
1 parent 2d3e030 commit c5a5767

File tree

4 files changed

+44
-69
lines changed

4 files changed

+44
-69
lines changed

src/murfey/server/api/workflow.py

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
TiltSeries,
5959
)
6060
from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo
61-
from murfey.util.processing_params import default_spa_parameters
61+
from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc
6262
from murfey.util.tomo import midpoint
6363

6464
logger = getLogger("murfey.server.api.workflow")
@@ -365,29 +365,7 @@ async def request_spa_preprocessing(
365365
machine_config = get_machine_config(instrument_name=instrument_name)[
366366
instrument_name
367367
]
368-
parts = [secure_filename(p) for p in Path(proc_file.path).parts]
369-
visit_idx = parts.index(visit_name)
370-
core = Path("/") / Path(*parts[: visit_idx + 1])
371-
ppath = Path("/") / Path(*parts)
372-
sub_dataset = ppath.relative_to(core).parts[0]
373-
extra_path = machine_config.processed_extra_directory
374-
for i, p in enumerate(ppath.parts):
375-
if p.startswith("raw"):
376-
movies_path_index = i
377-
break
378-
else:
379-
raise ValueError(f"{proc_file.path} does not contain a raw directory")
380-
mrc_out = (
381-
core
382-
/ machine_config.processed_directory_name
383-
/ sub_dataset
384-
/ extra_path
385-
/ "MotionCorr"
386-
/ "job002"
387-
/ "Movies"
388-
/ "/".join(ppath.parts[movies_path_index + 1 : -1])
389-
/ str(ppath.stem + "_motion_corrected.mrc")
390-
)
368+
mrc_out = motion_corrected_mrc(Path(proc_file.path), visit_name, machine_config)
391369
try:
392370
collected_ids = db.exec(
393371
select(DataCollectionGroup, DataCollection, ProcessingJob, AutoProcProgram)
@@ -491,7 +469,8 @@ async def request_spa_preprocessing(
491469
_transport_object.send("processing_recipe", zocalo_message)
492470
else:
493471
logger.error(
494-
f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found"
472+
f"Pre-processing was requested for {sanitise(Path(proc_file.path).name)} "
473+
"but no Zocalo transport object was found"
495474
)
496475
return proc_file
497476

@@ -646,22 +625,7 @@ async def request_tomography_preprocessing(
646625
machine_config = get_machine_config(instrument_name=instrument_name)[
647626
instrument_name
648627
]
649-
visit_idx = Path(proc_file.path).parts.index(visit_name)
650-
core = Path(*Path(proc_file.path).parts[: visit_idx + 1])
651-
ppath = Path("/".join(secure_filename(p) for p in Path(proc_file.path).parts))
652-
sub_dataset = "/".join(ppath.relative_to(core).parts[:-1])
653-
extra_path = machine_config.processed_extra_directory
654-
mrc_out = (
655-
core
656-
/ machine_config.processed_directory_name
657-
/ sub_dataset
658-
/ extra_path
659-
/ "MotionCorr"
660-
/ "job002"
661-
/ "Movies"
662-
/ str(ppath.stem + "_motion_corrected.mrc")
663-
)
664-
mrc_out = Path("/".join(secure_filename(p) for p in mrc_out.parts))
628+
mrc_out = motion_corrected_mrc(Path(proc_file.path), visit_name, machine_config)
665629

666630
recipe_name = machine_config.recipes.get("em-tomo-preprocess", "em-tomo-preprocess")
667631

@@ -732,7 +696,8 @@ async def request_tomography_preprocessing(
732696
_transport_object.send("processing_recipe", zocalo_message)
733697
else:
734698
logger.error(
735-
f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found"
699+
f"Pre-processing was requested for {sanitise(Path(proc_file.path).name)} "
700+
f"but no Zocalo transport object was found"
736701
)
737702
return proc_file
738703
else:

src/murfey/server/feedback.py

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
get_microscope,
5252
get_security_config,
5353
)
54-
from murfey.util.processing_params import default_spa_parameters
54+
from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc
5555
from murfey.util.tomo import midpoint
5656

5757
logger = logging.getLogger("murfey.server.feedback")
@@ -106,37 +106,16 @@ def get_all_tilts(tilt_series_id: int) -> List[str]:
106106
).all()
107107
if not complete_results:
108108
return []
109+
visit_name = complete_results[0][2].visit
109110
instrument_name = complete_results[0][2].instrument_name
110111
results = [r[0] for r in complete_results]
111112
machine_config = get_machine_config(instrument_name=instrument_name)[
112113
instrument_name
113114
]
114-
115-
def _mc_path(mov_path: Path) -> str:
116-
for p in mov_path.parts:
117-
if "-" in p and p.startswith(("bi", "nr", "nt", "cm", "sw")):
118-
visit_name = p
119-
break
120-
else:
121-
raise ValueError(f"No visit found in {mov_path}")
122-
visit_idx = Path(mov_path).parts.index(visit_name)
123-
core = Path(*Path(mov_path).parts[: visit_idx + 1])
124-
ppath = Path(mov_path)
125-
sub_dataset = "/".join(ppath.relative_to(core).parts[:-1])
126-
extra_path = machine_config.processed_extra_directory
127-
mrc_out = (
128-
core
129-
/ machine_config.processed_directory_name
130-
/ sub_dataset
131-
/ extra_path
132-
/ "MotionCorr"
133-
/ "job002"
134-
/ "Movies"
135-
/ str(ppath.stem + "_motion_corrected.mrc")
136-
)
137-
return str(mrc_out)
138-
139-
return [_mc_path(Path(r.movie_path)) for r in results]
115+
return [
116+
str(motion_corrected_mrc(Path(r.movie_path), visit_name, machine_config))
117+
for r in results
118+
]
140119

141120

142121
def get_job_ids(tilt_series_id: int, appid: int) -> JobIDs:

src/murfey/util/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class MachineConfig(BaseModel): # type: ignore
5757
processing_enabled: bool = True
5858
process_by_default: bool = True
5959
gain_directory_name: str = "processing"
60+
process_multiple_datasets: bool = True
6061
processed_directory_name: str = "processed"
6162
processed_extra_directory: str = ""
6263
recipes: dict[str, str] = {

src/murfey/util/processing_params.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,36 @@
1+
from pathlib import Path
12
from typing import Literal, Optional
23

34
from pydantic import BaseModel
5+
from werkzeug.utils import secure_filename
6+
7+
from murfey.util.config import MachineConfig
8+
9+
10+
def motion_corrected_mrc(
11+
input_movie: Path, visit_name: str, machine_config: MachineConfig
12+
):
13+
parts = [secure_filename(p) for p in input_movie.parts]
14+
visit_idx = parts.index(visit_name)
15+
core = Path("/") / Path(*parts[: visit_idx + 1])
16+
ppath = Path("/") / Path(*parts)
17+
if machine_config.process_multiple_datasets:
18+
sub_dataset = ppath.relative_to(core).parts[0]
19+
else:
20+
sub_dataset = ""
21+
extra_path = machine_config.processed_extra_directory
22+
mrc_out = (
23+
core
24+
/ machine_config.processed_directory_name
25+
/ sub_dataset
26+
/ extra_path
27+
/ "MotionCorr"
28+
/ "job002"
29+
/ "Movies"
30+
/ ppath.parent.relative_to(core / sub_dataset)
31+
/ str(ppath.stem + "_motion_corrected.mrc")
32+
)
33+
return Path("/".join(secure_filename(p) for p in mrc_out.parts))
434

535

636
class CLEMAlignAndMergeParameters(BaseModel):

0 commit comments

Comments
 (0)