Skip to content

Commit 8b4803a

Browse files
Match data collection groups for atlases and processing (#696)
Creates a common function for data collection group creation by the client. The data collection groups are made as atlases then updated to be SPA or Tomo if data collection happens. Diff looks bigger than it really is as a test file got moved.
1 parent 6aa3f3f commit 8b4803a

File tree

18 files changed

+1150
-534
lines changed

18 files changed

+1150
-534
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ GitHub = "https://github.com/DiamondLightSource/python-murfey"
108108
"clem.register_preprocessing_result" = "murfey.workflows.clem.register_preprocessing_results:run"
109109
"data_collection" = "murfey.workflows.register_data_collection:run"
110110
"data_collection_group" = "murfey.workflows.register_data_collection_group:run"
111+
"experiment_type_update" = "murfey.workflows.register_experiment_type_update:run"
111112
"pato" = "murfey.workflows.notifications:notification_setup"
112113
"picked_particles" = "murfey.workflows.spa.picking:particles_picked"
113114
"picked_tomogram" = "murfey.workflows.tomo.picking:picked_tomogram"

src/murfey/client/context.py

Lines changed: 145 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,156 @@
33
import logging
44
from importlib.metadata import entry_points
55
from pathlib import Path
6-
from typing import Any, Dict, List, NamedTuple
6+
from typing import Any, List, NamedTuple
77

8-
from murfey.client.instance_environment import MurfeyInstanceEnvironment
8+
import xmltodict
9+
10+
from murfey.client.instance_environment import MurfeyInstanceEnvironment, SampleInfo
11+
from murfey.util.client import capture_post, get_machine_config_client
912

1013
logger = logging.getLogger("murfey.client.context")
1114

1215

13-
class FutureRequest(NamedTuple):
14-
url: str
15-
message: Dict[str, Any]
16+
def _atlas_destination(
17+
environment: MurfeyInstanceEnvironment, source: Path, token: str
18+
) -> Path:
19+
machine_config = get_machine_config_client(
20+
str(environment.url.geturl()),
21+
token,
22+
instrument_name=environment.instrument_name,
23+
demo=environment.demo,
24+
)
25+
for i, destination_part in enumerate(
26+
Path(environment.default_destinations[source]).parts
27+
):
28+
if destination_part == environment.visit:
29+
return Path(machine_config.get("rsync_basepath", "")) / "/".join(
30+
Path(environment.default_destinations[source]).parent.parts[: i + 1]
31+
)
32+
return (
33+
Path(machine_config.get("rsync_basepath", ""))
34+
/ Path(environment.default_destinations[source]).parent
35+
/ environment.visit
36+
)
37+
38+
39+
def ensure_dcg_exists(
40+
collection_type: str,
41+
metadata_source: Path,
42+
environment: MurfeyInstanceEnvironment,
43+
token: str,
44+
) -> str | None:
45+
"""Create a data collection group"""
46+
if collection_type == "tomo":
47+
experiment_type_id = 36
48+
session_file = metadata_source / "Session.dm"
49+
elif collection_type == "spa":
50+
experiment_type_id = 37
51+
session_file = metadata_source / "EpuSession.dm"
52+
for h in entry_points(group="murfey.hooks"):
53+
try:
54+
if h.name == "get_epu_session_metadata":
55+
h.load()(session_file, environment=environment)
56+
except Exception as e:
57+
logger.warning(f"Get EPU session hook failed: {e}")
58+
else:
59+
logger.error(f"Unknown collection type {collection_type}")
60+
return None
61+
62+
if not session_file.is_file():
63+
logger.warning(f"Cannot find session file {str(session_file)}")
64+
dcg_tag = (
65+
str(metadata_source).replace(f"/{environment.visit}", "").replace("//", "/")
66+
)
67+
dcg_data = {
68+
"experiment_type_id": experiment_type_id,
69+
"tag": dcg_tag,
70+
}
71+
else:
72+
with open(session_file, "r") as session_xml:
73+
session_data = xmltodict.parse(session_xml.read())
74+
75+
if collection_type == "tomo":
76+
windows_path = session_data["TomographySession"]["AtlasId"]
77+
else:
78+
windows_path = session_data["EpuSessionXml"]["Samples"]["_items"][
79+
"SampleXml"
80+
][0]["AtlasId"]["#text"]
81+
82+
logger.info(f"Windows path to atlas metadata found: {windows_path}")
83+
if not windows_path:
84+
logger.warning("No atlas metadata path found")
85+
return None
86+
visit_index = windows_path.split("\\").index(environment.visit)
87+
partial_path = "/".join(windows_path.split("\\")[visit_index + 1 :])
88+
logger.info("Partial Linux path successfully constructed from Windows path")
89+
90+
source_visit_dir = metadata_source.parent
91+
logger.info(
92+
f"Looking for atlas XML file in metadata directory {str((source_visit_dir / partial_path).parent)}"
93+
)
94+
atlas_xml_path = list(
95+
(source_visit_dir / partial_path).parent.glob("Atlas_*.xml")
96+
)[0]
97+
logger.info(f"Atlas XML path {str(atlas_xml_path)} found")
98+
with open(atlas_xml_path, "rb") as atlas_xml:
99+
atlas_xml_data = xmltodict.parse(atlas_xml)
100+
atlas_original_pixel_size = float(
101+
atlas_xml_data["MicroscopeImage"]["SpatialScale"]["pixelSize"]["x"][
102+
"numericValue"
103+
]
104+
)
105+
# need to calculate the pixel size of the downscaled image
106+
atlas_pixel_size = atlas_original_pixel_size * 7.8
107+
logger.info(f"Atlas image pixel size determined to be {atlas_pixel_size}")
108+
109+
for p in partial_path.split("/"):
110+
if p.startswith("Sample"):
111+
sample = int(p.replace("Sample", ""))
112+
break
113+
else:
114+
logger.warning(f"Sample could not be identified for {metadata_source}")
115+
return None
116+
environment.samples[metadata_source] = SampleInfo(
117+
atlas=Path(partial_path), sample=sample
118+
)
119+
120+
dcg_search_dir = (
121+
str(metadata_source).replace(f"/{environment.visit}", "").replace("//", "/")
122+
)
123+
if collection_type == "tomo":
124+
dcg_tag = dcg_search_dir
125+
else:
126+
dcg_images_dirs = sorted(
127+
Path(dcg_search_dir).glob("Images-Disc*"),
128+
key=lambda x: x.stat().st_ctime,
129+
)
130+
if not dcg_images_dirs:
131+
logger.warning(f"Cannot find Images-Disc* in {dcg_search_dir}")
132+
return None
133+
dcg_tag = str(dcg_images_dirs[-1])
134+
135+
dcg_data = {
136+
"experiment_type_id": experiment_type_id,
137+
"tag": dcg_tag,
138+
"atlas": str(
139+
_atlas_destination(environment, metadata_source, token)
140+
/ environment.samples[metadata_source].atlas.parent
141+
/ atlas_xml_path.with_suffix(".jpg").name
142+
).replace("//", "/"),
143+
"sample": environment.samples[metadata_source].sample,
144+
"atlas_pixel_size": atlas_pixel_size,
145+
}
146+
capture_post(
147+
base_url=str(environment.url.geturl()),
148+
router_name="workflow.router",
149+
function_name="register_dc_group",
150+
token=token,
151+
visit_name=environment.visit,
152+
session_id=environment.murfey_session,
153+
data=dcg_data,
154+
)
155+
return dcg_tag
16156

17157

18158
class ProcessingParameter(NamedTuple):

src/murfey/client/contexts/atlas.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44

55
import xmltodict
66

7-
from murfey.client.context import Context
7+
from murfey.client.context import Context, _atlas_destination
88
from murfey.client.contexts.spa import _get_source
9-
from murfey.client.contexts.spa_metadata import _atlas_destination
109
from murfey.client.instance_environment import MurfeyInstanceEnvironment
1110
from murfey.util.client import capture_post
1211

@@ -38,7 +37,7 @@ def post_transfer(
3837
source = _get_source(transferred_file, environment)
3938
if source:
4039
transferred_atlas_name = _atlas_destination(
41-
environment, source, transferred_file, self._token
40+
environment, source, self._token
4241
) / transferred_file.relative_to(source.parent)
4342
capture_post(
4443
base_url=str(environment.url.geturl()),
@@ -60,7 +59,7 @@ def post_transfer(
6059
if source:
6160
atlas_mrc = transferred_file.with_suffix(".mrc")
6261
transferred_atlas_jpg = _atlas_destination(
63-
environment, source, atlas_mrc, self._token
62+
environment, source, self._token
6463
) / atlas_mrc.relative_to(source.parent).with_suffix(".jpg")
6564

6665
with open(transferred_file, "rb") as atlas_xml:

src/murfey/client/contexts/spa_metadata.py

Lines changed: 16 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44

55
import xmltodict
66

7-
from murfey.client.context import Context
7+
from murfey.client.context import Context, ensure_dcg_exists
88
from murfey.client.contexts.spa import _file_transferred_to, _get_source
9-
from murfey.client.instance_environment import MurfeyInstanceEnvironment, SampleInfo
10-
from murfey.util.client import capture_post, get_machine_config_client
9+
from murfey.client.instance_environment import MurfeyInstanceEnvironment
10+
from murfey.util.client import capture_post
1111
from murfey.util.spa_metadata import (
1212
FoilHoleInfo,
1313
get_grid_square_atlas_positions,
@@ -69,29 +69,6 @@ def _foil_hole_positions(xml_path: Path, grid_square: int) -> Dict[str, FoilHole
6969
return foil_holes
7070

7171

72-
def _atlas_destination(
73-
environment: MurfeyInstanceEnvironment, source: Path, file_path: Path, token: str
74-
) -> Path:
75-
machine_config = get_machine_config_client(
76-
str(environment.url.geturl()),
77-
token,
78-
instrument_name=environment.instrument_name,
79-
demo=environment.demo,
80-
)
81-
for i, destination_part in enumerate(
82-
Path(environment.default_destinations[source]).parts
83-
):
84-
if destination_part == environment.visit:
85-
return Path(machine_config.get("rsync_basepath", "")) / "/".join(
86-
Path(environment.default_destinations[source]).parent.parts[: i + 1]
87-
)
88-
return (
89-
Path(machine_config.get("rsync_basepath", ""))
90-
/ Path(environment.default_destinations[source]).parent
91-
/ environment.visit
92-
)
93-
94-
9572
class SPAMetadataContext(Context):
9673
def __init__(self, acquisition_software: str, basepath: Path, token: str):
9774
super().__init__("SPA_metadata", acquisition_software, token)
@@ -124,82 +101,19 @@ def post_transfer(
124101
source = _get_source(transferred_file, environment)
125102
if not source:
126103
logger.warning(
127-
f"Source could not be indentified for {str(transferred_file)}"
104+
f"Source could not be identified for {str(transferred_file)}"
128105
)
129106
return
130107

131-
source_visit_dir = source.parent
132-
133-
logger.info(
134-
f"Looking for atlas XML file in metadata directory {str((source_visit_dir / partial_path).parent)}"
135-
)
136-
atlas_xml_path = list(
137-
(source_visit_dir / partial_path).parent.glob("Atlas_*.xml")
138-
)[0]
139-
logger.info(f"Atlas XML path {str(atlas_xml_path)} found")
140-
with open(atlas_xml_path, "rb") as atlas_xml:
141-
atlas_xml_data = xmltodict.parse(atlas_xml)
142-
atlas_original_pixel_size = float(
143-
atlas_xml_data["MicroscopeImage"]["SpatialScale"]["pixelSize"]["x"][
144-
"numericValue"
145-
]
146-
)
147-
148-
# need to calculate the pixel size of the downscaled image
149-
atlas_pixel_size = atlas_original_pixel_size * 7.8
150-
logger.info(f"Atlas image pixel size determined to be {atlas_pixel_size}")
151-
152-
for p in partial_path.split("/"):
153-
if p.startswith("Sample"):
154-
sample = int(p.replace("Sample", ""))
155-
break
156-
else:
157-
logger.warning(f"Sample could not be identified for {transferred_file}")
158-
return
159108
if source:
160-
environment.samples[source] = SampleInfo(
161-
atlas=Path(partial_path), sample=sample
162-
)
163-
dcg_search_dir = "/".join(
164-
p for p in transferred_file.parent.parts if p != environment.visit
165-
)
166-
dcg_search_dir = (
167-
dcg_search_dir[1:]
168-
if dcg_search_dir.startswith("//")
169-
else dcg_search_dir
170-
)
171-
dcg_images_dirs = sorted(
172-
Path(dcg_search_dir).glob("Images-Disc*"),
173-
key=lambda x: x.stat().st_ctime,
174-
)
175-
if not dcg_images_dirs:
176-
logger.warning(f"Cannot find Images-Disc* in {dcg_search_dir}")
177-
return
178-
dcg_tag = str(dcg_images_dirs[-1])
179-
dcg_data = {
180-
"experiment_type_id": 37, # Single particle
181-
"tag": dcg_tag,
182-
"atlas": str(
183-
_atlas_destination(
184-
environment, source, transferred_file, self._token
185-
)
186-
/ environment.samples[source].atlas.parent
187-
/ atlas_xml_path.with_suffix(".jpg").name
188-
),
189-
"sample": environment.samples[source].sample,
190-
"atlas_pixel_size": atlas_pixel_size,
191-
}
192-
capture_post(
193-
base_url=str(environment.url.geturl()),
194-
router_name="workflow.router",
195-
function_name="register_dc_group",
109+
dcg_tag = ensure_dcg_exists(
110+
collection_type="spa",
111+
metadata_source=source,
112+
environment=environment,
196113
token=self._token,
197-
visit_name=environment.visit,
198-
session_id=environment.murfey_session,
199-
data=dcg_data,
200114
)
201115
gs_pix_positions = get_grid_square_atlas_positions(
202-
source_visit_dir / partial_path
116+
source.parent / partial_path
203117
)
204118
for gs, pos_data in gs_pix_positions.items():
205119
if pos_data:
@@ -228,46 +142,21 @@ def post_transfer(
228142
and environment
229143
):
230144
# Make sure we have a data collection group before trying to register grid square
231-
dcg_search_dir = "/".join(
232-
p
233-
for p in transferred_file.parent.parent.parts
234-
if p != environment.visit
235-
)
236-
dcg_search_dir = (
237-
dcg_search_dir[1:]
238-
if dcg_search_dir.startswith("//")
239-
else dcg_search_dir
240-
)
241-
dcg_images_dirs = sorted(
242-
Path(dcg_search_dir).glob("Images-Disc*"),
243-
key=lambda x: x.stat().st_ctime,
244-
)
245-
if not dcg_images_dirs:
246-
logger.warning(f"Cannot find Images-Disc* in {dcg_search_dir}")
247-
return
248-
dcg_tag = str(dcg_images_dirs[-1])
249-
dcg_data = {
250-
"experiment_type_id": 37, # Single particle
251-
"tag": dcg_tag,
252-
}
253-
capture_post(
254-
base_url=str(environment.url.geturl()),
255-
router_name="workflow.router",
256-
function_name="register_dc_group",
145+
source = _get_source(transferred_file, environment=environment)
146+
if source is None:
147+
return None
148+
ensure_dcg_exists(
149+
collection_type="spa",
150+
metadata_source=source,
151+
environment=environment,
257152
token=self._token,
258-
visit_name=environment.visit,
259-
session_id=environment.murfey_session,
260-
data=dcg_data,
261153
)
262154

263155
gs_name = int(transferred_file.stem.split("_")[1])
264156
logger.info(
265157
f"Collecting foil hole positions for {str(transferred_file)} and grid square {gs_name}"
266158
)
267159
fh_positions = _foil_hole_positions(transferred_file, gs_name)
268-
source = _get_source(transferred_file, environment=environment)
269-
if source is None:
270-
return None
271160
visitless_source_search_dir = str(source).replace(
272161
f"/{environment.visit}", ""
273162
)

0 commit comments

Comments
 (0)