Skip to content

Commit 663361c

Browse files
committed
Common function for dcg registration
1 parent 6e08588 commit 663361c

File tree

5 files changed

+167
-220
lines changed

5 files changed

+167
-220
lines changed

src/murfey/client/context.py

Lines changed: 139 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,150 @@
33
import logging
44
from importlib.metadata import entry_points
55
from pathlib import Path
6-
from typing import Any, Dict, List, NamedTuple
6+
from typing import Any, List, NamedTuple
77

8-
from murfey.client.instance_environment import MurfeyInstanceEnvironment
8+
import xmltodict
9+
10+
from murfey.client.instance_environment import MurfeyInstanceEnvironment, SampleInfo
11+
from murfey.util.client import capture_post, get_machine_config_client
912

1013
logger = logging.getLogger("murfey.client.context")
1114

1215

13-
class FutureRequest(NamedTuple):
14-
url: str
15-
message: Dict[str, Any]
16+
def _atlas_destination(
17+
environment: MurfeyInstanceEnvironment, source: Path, token: str
18+
) -> Path:
19+
machine_config = get_machine_config_client(
20+
str(environment.url.geturl()),
21+
token,
22+
instrument_name=environment.instrument_name,
23+
demo=environment.demo,
24+
)
25+
for i, destination_part in enumerate(
26+
Path(environment.default_destinations[source]).parts
27+
):
28+
if destination_part == environment.visit:
29+
return Path(machine_config.get("rsync_basepath", "")) / "/".join(
30+
Path(environment.default_destinations[source]).parent.parts[: i + 1]
31+
)
32+
return (
33+
Path(machine_config.get("rsync_basepath", ""))
34+
/ Path(environment.default_destinations[source]).parent
35+
/ environment.visit
36+
)
37+
38+
39+
def ensure_dcg_exists(
40+
collection_type: str,
41+
metadata_source: Path,
42+
environment: MurfeyInstanceEnvironment,
43+
token: str,
44+
):
45+
"""Create a data collection group"""
46+
if collection_type == "tomo":
47+
experiment_type_id = 36
48+
session_file = metadata_source / "Session.dm"
49+
elif collection_type == "spa":
50+
experiment_type_id = 37
51+
session_file = metadata_source / "EpuSession.dm"
52+
else:
53+
logger.error(f"Unknown collection type {collection_type}")
54+
return
55+
56+
if not session_file.is_file():
57+
logger.warning(f"Cannot find session file {str(session_file)}")
58+
dcg_tag = (
59+
str(metadata_source).replace(f"/{environment.visit}", "").replace("//", "/")
60+
)
61+
dcg_data = {
62+
"experiment_type_id": experiment_type_id,
63+
"tag": dcg_tag,
64+
}
65+
else:
66+
with open(session_file, "r") as session_xml:
67+
session_data = xmltodict.parse(session_xml.read())
68+
69+
if collection_type == "tomo":
70+
windows_path = session_data["TomographySession"]["AtlasId"]
71+
else:
72+
windows_path = session_data["EpuSessionXml"]["Samples"]["_items"][
73+
"SampleXml"
74+
][0]["AtlasId"]["#text"]
75+
76+
logger.info(f"Windows path to atlas metadata found: {windows_path}")
77+
if not windows_path:
78+
logger.warning("No atlas metadata path found")
79+
return
80+
visit_index = windows_path.split("\\").index(environment.visit)
81+
partial_path = "/".join(windows_path.split("\\")[visit_index + 1 :])
82+
logger.info("Partial Linux path successfully constructed from Windows path")
83+
84+
source_visit_dir = metadata_source.parent
85+
logger.info(
86+
f"Looking for atlas XML file in metadata directory {str((source_visit_dir / partial_path).parent)}"
87+
)
88+
atlas_xml_path = list(
89+
(source_visit_dir / partial_path).parent.glob("Atlas_*.xml")
90+
)[0]
91+
logger.info(f"Atlas XML path {str(atlas_xml_path)} found")
92+
with open(atlas_xml_path, "rb") as atlas_xml:
93+
atlas_xml_data = xmltodict.parse(atlas_xml)
94+
atlas_original_pixel_size = float(
95+
atlas_xml_data["MicroscopeImage"]["SpatialScale"]["pixelSize"]["x"][
96+
"numericValue"
97+
]
98+
)
99+
# need to calculate the pixel size of the downscaled image
100+
atlas_pixel_size = atlas_original_pixel_size * 7.8
101+
logger.info(f"Atlas image pixel size determined to be {atlas_pixel_size}")
102+
103+
for p in partial_path.split("/"):
104+
if p.startswith("Sample"):
105+
sample = int(p.replace("Sample", ""))
106+
break
107+
else:
108+
logger.warning(f"Sample could not be identified for {metadata_source}")
109+
return
110+
environment.samples[metadata_source] = SampleInfo(
111+
atlas=Path(partial_path), sample=sample
112+
)
113+
114+
dcg_search_dir = (
115+
str(metadata_source).replace(f"/{environment.visit}", "").replace("//", "/")
116+
)
117+
if collection_type == "tomo":
118+
dcg_tag = dcg_search_dir
119+
else:
120+
dcg_images_dirs = sorted(
121+
Path(dcg_search_dir).glob("Images-Disc*"),
122+
key=lambda x: x.stat().st_ctime,
123+
)
124+
if not dcg_images_dirs:
125+
logger.warning(f"Cannot find Images-Disc* in {dcg_search_dir}")
126+
return
127+
dcg_tag = str(dcg_images_dirs[-1])
128+
129+
dcg_data = {
130+
"experiment_type_id": experiment_type_id,
131+
"tag": dcg_tag,
132+
"atlas": str(
133+
_atlas_destination(environment, metadata_source, token)
134+
/ environment.samples[metadata_source].atlas.parent
135+
/ atlas_xml_path.with_suffix(".jpg").name
136+
),
137+
"sample": environment.samples[metadata_source].sample,
138+
"atlas_pixel_size": atlas_pixel_size,
139+
}
140+
capture_post(
141+
base_url=str(environment.url.geturl()),
142+
router_name="workflow.router",
143+
function_name="register_dc_group",
144+
token=token,
145+
visit_name=environment.visit,
146+
session_id=environment.murfey_session,
147+
data=dcg_data,
148+
)
149+
return dcg_tag
16150

17151

18152
class ProcessingParameter(NamedTuple):

src/murfey/client/contexts/atlas.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
from pathlib import Path
33
from typing import Optional
44

5-
from murfey.client.context import Context
5+
from murfey.client.context import Context, _atlas_destination
66
from murfey.client.contexts.spa import _get_source
7-
from murfey.client.contexts.spa_metadata import _atlas_destination
87
from murfey.client.instance_environment import MurfeyInstanceEnvironment
98
from murfey.util.client import capture_post
109

@@ -36,7 +35,7 @@ def post_transfer(
3635
source = _get_source(transferred_file, environment)
3736
if source:
3837
transferred_atlas_name = _atlas_destination(
39-
environment, source, transferred_file, self._token
38+
environment, source, self._token
4039
) / transferred_file.relative_to(source.parent)
4140
capture_post(
4241
base_url=str(environment.url.geturl()),

src/murfey/client/contexts/spa_metadata.py

Lines changed: 10 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44

55
import xmltodict
66

7-
from murfey.client.context import Context
7+
from murfey.client.context import Context, ensure_dcg_exists
88
from murfey.client.contexts.spa import _file_transferred_to, _get_source
9-
from murfey.client.instance_environment import MurfeyInstanceEnvironment, SampleInfo
10-
from murfey.util.client import capture_post, get_machine_config_client
9+
from murfey.client.instance_environment import MurfeyInstanceEnvironment
10+
from murfey.util.client import capture_post
1111
from murfey.util.spa_metadata import (
1212
FoilHoleInfo,
1313
get_grid_square_atlas_positions,
@@ -69,29 +69,6 @@ def _foil_hole_positions(xml_path: Path, grid_square: int) -> Dict[str, FoilHole
6969
return foil_holes
7070

7171

72-
def _atlas_destination(
73-
environment: MurfeyInstanceEnvironment, source: Path, file_path: Path, token: str
74-
) -> Path:
75-
machine_config = get_machine_config_client(
76-
str(environment.url.geturl()),
77-
token,
78-
instrument_name=environment.instrument_name,
79-
demo=environment.demo,
80-
)
81-
for i, destination_part in enumerate(
82-
Path(environment.default_destinations[source]).parts
83-
):
84-
if destination_part == environment.visit:
85-
return Path(machine_config.get("rsync_basepath", "")) / "/".join(
86-
Path(environment.default_destinations[source]).parent.parts[: i + 1]
87-
)
88-
return (
89-
Path(machine_config.get("rsync_basepath", ""))
90-
/ Path(environment.default_destinations[source]).parent
91-
/ environment.visit
92-
)
93-
94-
9572
class SPAMetadataContext(Context):
9673
def __init__(self, acquisition_software: str, basepath: Path, token: str):
9774
super().__init__("SPA_metadata", acquisition_software, token)
@@ -124,82 +101,14 @@ def post_transfer(
124101
source = _get_source(transferred_file, environment)
125102
if not source:
126103
logger.warning(
127-
f"Source could not be indentified for {str(transferred_file)}"
104+
f"Source could not be identified for {str(transferred_file)}"
128105
)
129106
return
130107

131-
source_visit_dir = source.parent
132-
133-
logger.info(
134-
f"Looking for atlas XML file in metadata directory {str((source_visit_dir / partial_path).parent)}"
135-
)
136-
atlas_xml_path = list(
137-
(source_visit_dir / partial_path).parent.glob("Atlas_*.xml")
138-
)[0]
139-
logger.info(f"Atlas XML path {str(atlas_xml_path)} found")
140-
with open(atlas_xml_path, "rb") as atlas_xml:
141-
atlas_xml_data = xmltodict.parse(atlas_xml)
142-
atlas_original_pixel_size = float(
143-
atlas_xml_data["MicroscopeImage"]["SpatialScale"]["pixelSize"]["x"][
144-
"numericValue"
145-
]
146-
)
147-
148-
# need to calculate the pixel size of the downscaled image
149-
atlas_pixel_size = atlas_original_pixel_size * 7.8
150-
logger.info(f"Atlas image pixel size determined to be {atlas_pixel_size}")
151-
152-
for p in partial_path.split("/"):
153-
if p.startswith("Sample"):
154-
sample = int(p.replace("Sample", ""))
155-
break
156-
else:
157-
logger.warning(f"Sample could not be identified for {transferred_file}")
158-
return
159108
if source:
160-
environment.samples[source] = SampleInfo(
161-
atlas=Path(partial_path), sample=sample
162-
)
163-
dcg_search_dir = "/".join(
164-
p for p in transferred_file.parent.parts if p != environment.visit
165-
)
166-
dcg_search_dir = (
167-
dcg_search_dir[1:]
168-
if dcg_search_dir.startswith("//")
169-
else dcg_search_dir
170-
)
171-
dcg_images_dirs = sorted(
172-
Path(dcg_search_dir).glob("Images-Disc*"),
173-
key=lambda x: x.stat().st_ctime,
174-
)
175-
if not dcg_images_dirs:
176-
logger.warning(f"Cannot find Images-Disc* in {dcg_search_dir}")
177-
return
178-
dcg_tag = str(dcg_images_dirs[-1])
179-
dcg_data = {
180-
"experiment_type_id": 37, # Single particle
181-
"tag": dcg_tag,
182-
"atlas": str(
183-
_atlas_destination(
184-
environment, source, transferred_file, self._token
185-
)
186-
/ environment.samples[source].atlas.parent
187-
/ atlas_xml_path.with_suffix(".jpg").name
188-
),
189-
"sample": environment.samples[source].sample,
190-
"atlas_pixel_size": atlas_pixel_size,
191-
}
192-
capture_post(
193-
base_url=str(environment.url.geturl()),
194-
router_name="workflow.router",
195-
function_name="register_dc_group",
196-
token=self._token,
197-
visit_name=environment.visit,
198-
session_id=environment.murfey_session,
199-
data=dcg_data,
200-
)
109+
dcg_tag = ensure_dcg_exists("spa", source, environment, self._token)
201110
gs_pix_positions = get_grid_square_atlas_positions(
202-
source_visit_dir / partial_path
111+
source.parent / partial_path
203112
)
204113
for gs, pos_data in gs_pix_positions.items():
205114
if pos_data:
@@ -228,46 +137,16 @@ def post_transfer(
228137
and environment
229138
):
230139
# Make sure we have a data collection group before trying to register grid square
231-
dcg_search_dir = "/".join(
232-
p
233-
for p in transferred_file.parent.parent.parts
234-
if p != environment.visit
235-
)
236-
dcg_search_dir = (
237-
dcg_search_dir[1:]
238-
if dcg_search_dir.startswith("//")
239-
else dcg_search_dir
240-
)
241-
dcg_images_dirs = sorted(
242-
Path(dcg_search_dir).glob("Images-Disc*"),
243-
key=lambda x: x.stat().st_ctime,
244-
)
245-
if not dcg_images_dirs:
246-
logger.warning(f"Cannot find Images-Disc* in {dcg_search_dir}")
247-
return
248-
dcg_tag = str(dcg_images_dirs[-1])
249-
dcg_data = {
250-
"experiment_type_id": 37, # Single particle
251-
"tag": dcg_tag,
252-
}
253-
capture_post(
254-
base_url=str(environment.url.geturl()),
255-
router_name="workflow.router",
256-
function_name="register_dc_group",
257-
token=self._token,
258-
visit_name=environment.visit,
259-
session_id=environment.murfey_session,
260-
data=dcg_data,
261-
)
140+
source = _get_source(transferred_file, environment=environment)
141+
if source is None:
142+
return None
143+
ensure_dcg_exists("spa", source, environment, self._token)
262144

263145
gs_name = int(transferred_file.stem.split("_")[1])
264146
logger.info(
265147
f"Collecting foil hole positions for {str(transferred_file)} and grid square {gs_name}"
266148
)
267149
fh_positions = _foil_hole_positions(transferred_file, gs_name)
268-
source = _get_source(transferred_file, environment=environment)
269-
if source is None:
270-
return None
271150
visitless_source_search_dir = str(source).replace(
272151
f"/{environment.visit}", ""
273152
)

0 commit comments

Comments
 (0)