Skip to content

Commit 7529c54

Browse files
committed
Fixes and tests
1 parent a0d512f commit 7529c54

File tree

6 files changed

+197
-56
lines changed

6 files changed

+197
-56
lines changed

src/murfey/client/analyser.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from murfey.client.contexts.spa_metadata import SPAMetadataContext
2222
from murfey.client.contexts.tomo import TomographyContext
2323
from murfey.client.contexts.tomo_metadata import TomographyMetadataContext
24+
from murfey.client.destinations import find_longest_data_directory
2425
from murfey.client.instance_environment import MurfeyInstanceEnvironment
2526
from murfey.client.rsync import RSyncerUpdate, TransferResult
2627
from murfey.util.client import Observer, get_machine_config_client
@@ -394,12 +395,8 @@ def _xml_file(self, data_file: Path) -> Path:
394395
return data_file.with_suffix(".xml")
395396
file_name = f"{'_'.join(p for p in data_file.stem.split('_')[:-1])}.xml"
396397
data_directories = self._murfey_config.get("data_directories", [])
397-
for dd in data_directories:
398-
if str(data_file).startswith(dd):
399-
base_dir = Path(dd).absolute()
400-
mid_dir = data_file.relative_to(base_dir).parent
401-
break
402-
else:
398+
base_dir, mid_dir = find_longest_data_directory(data_file, data_directories)
399+
if not base_dir:
403400
return data_file.with_suffix(".xml")
404401
return base_dir / self._environment.visit / mid_dir / file_name
405402

src/murfey/client/contexts/spa.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import xmltodict
99

1010
from murfey.client.context import Context, ProcessingParameter
11+
from murfey.client.destinations import find_longest_data_directory
1112
from murfey.client.instance_environment import (
1213
MovieTracker,
1314
MurfeyID,
@@ -51,12 +52,8 @@ def _file_transferred_to(
5152
def _grid_square_metadata_file(
5253
f: Path, data_directories: List[Path], visit: str, grid_square: int
5354
) -> Path:
54-
for dd in data_directories:
55-
if str(f).startswith(str(dd)):
56-
base_dir = dd.absolute()
57-
mid_dir = f.relative_to(base_dir).parent
58-
break
59-
else:
55+
base_dir, mid_dir = find_longest_data_directory(f, data_directories)
56+
if not base_dir:
6057
raise ValueError(f"Could not determine grid square metadata path for {f}")
6158
metadata_file = (
6259
base_dir

src/murfey/client/destinations.py

Lines changed: 58 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,29 @@
77
MurfeyInstanceEnvironment,
88
global_env_lock,
99
)
10+
from murfey.util import secure_path
1011
from murfey.util.client import capture_get, capture_post
1112

1213
logger = logging.getLogger("murfey.client.destinations")
1314

1415

16+
def find_longest_data_directory(
17+
match_path: Path, data_directories: list[str] | list[Path]
18+
):
19+
"""
20+
Determine the longest path in the data_directories list
21+
which the match path is relative to
22+
"""
23+
base_dir = ""
24+
mid_dir = Path("")
25+
for dd in data_directories:
26+
dd_base = str(Path(dd).absolute())
27+
if str(match_path).startswith(str(dd)) and len(dd_base) > len(base_dir):
28+
base_dir = dd_base
29+
mid_dir = match_path.absolute().relative_to(Path(base_dir)).parent
30+
return base_dir, mid_dir
31+
32+
1533
def determine_default_destination(
1634
visit: str,
1735
source: Path,
@@ -20,54 +38,56 @@ def determine_default_destination(
2038
token: str,
2139
touch: bool = False,
2240
extra_directory: str = "",
23-
include_mid_path: bool = True,
2441
use_suggested_path: bool = True,
2542
) -> str:
43+
if not destination or not visit:
44+
raise ValueError(f"No destination ({destination}) or visit ({visit}) supplied")
2645
machine_data = capture_get(
2746
base_url=str(environment.url.geturl()),
2847
router_name="session_control.router",
2948
function_name="machine_info_by_instrument",
3049
token=token,
3150
instrument_name=environment.instrument_name,
3251
).json()
33-
_default = f"{destination}/{visit}"
34-
for data_dir in machine_data.get("data_directories", []):
35-
if source.absolute() == Path(data_dir).absolute():
36-
_default = f"{destination}/{visit}"
37-
break
38-
else:
39-
mid_path = source.absolute().relative_to(Path(data_dir).absolute())
40-
if use_suggested_path:
41-
with global_env_lock:
42-
if source.name == "Images-Disc1":
43-
source_name = source.parent.name
44-
elif source.name.startswith("Sample"):
45-
source_name = f"{source.parent.name}_{source.name}"
46-
else:
47-
source_name = source.name
48-
if environment.destination_registry.get(source_name):
49-
_default = environment.destination_registry[source_name]
50-
else:
51-
suggested_path_response = capture_post(
52-
base_url=str(environment.url.geturl()),
53-
router_name="file_io_instrument.router",
54-
function_name="suggest_path",
55-
token=token,
56-
visit_name=visit,
57-
session_id=environment.murfey_session,
58-
data={
59-
"base_path": f"{destination}/{visit}/{mid_path.parent if include_mid_path else ''}/raw",
60-
"touch": touch,
61-
"extra_directory": extra_directory,
62-
},
63-
)
64-
if suggested_path_response is None:
65-
raise RuntimeError("Murfey server is unreachable")
66-
_default = suggested_path_response.json().get("suggested_path")
67-
environment.destination_registry[source_name] = _default
52+
base_path, mid_path = find_longest_data_directory(
53+
source, machine_data.get("data_directories", [])
54+
)
55+
if not base_path:
56+
raise ValueError(f"No data directory found for {source}")
57+
if source.absolute() == Path(base_path).absolute():
58+
raise ValueError(
59+
f"Source is the same as the base path {secure_path(source.absolute())}"
60+
)
61+
62+
_default = f"{destination}/{visit}/{source.name}"
63+
if use_suggested_path:
64+
with global_env_lock:
65+
if source.name == "Images-Disc1":
66+
source_name = source.parent.name
67+
elif source.name.startswith("Sample"):
68+
source_name = f"{source.parent.name}_{source.name}"
69+
else:
70+
source_name = source.name
71+
if environment.destination_registry.get(source_name):
72+
_default = environment.destination_registry[source_name]
6873
else:
69-
_default = f"{destination}/{visit}/{mid_path if include_mid_path else source.name}"
70-
break
74+
suggested_path_response = capture_post(
75+
base_url=str(environment.url.geturl()),
76+
router_name="file_io_instrument.router",
77+
function_name="suggest_path",
78+
token=token,
79+
visit_name=visit,
80+
session_id=environment.murfey_session,
81+
data={
82+
"base_path": f"{destination}/{visit}/raw",
83+
"touch": touch,
84+
"extra_directory": extra_directory,
85+
},
86+
)
87+
if suggested_path_response is None:
88+
raise RuntimeError("Murfey server is unreachable")
89+
_default = suggested_path_response.json().get("suggested_path")
90+
environment.destination_registry[source_name] = _default
7191
return (
7292
_default + f"/{extra_directory}"
7393
if not _default.endswith("/")

src/murfey/client/multigrid_control.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,6 @@ def _start_rsyncer_multigrid(
235235
self,
236236
source: Path,
237237
extra_directory: str = "",
238-
include_mid_path: bool = True,
239238
use_suggested_path: bool = True,
240239
destination_overrides: Optional[Dict[Path, str]] = None,
241240
remove_files: bool = False,
@@ -276,7 +275,6 @@ def _start_rsyncer_multigrid(
276275
self.token,
277276
touch=True,
278277
extra_directory=extra_directory,
279-
include_mid_path=include_mid_path,
280278
use_suggested_path=use_suggested_path,
281279
)
282280
self._environment.sources.append(source)

src/murfey/client/watchdir_multigrid.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def _handle_metadata(self, directory: Path, extra_directory: str):
5555
self.notify(
5656
directory,
5757
extra_directory=extra_directory,
58-
include_mid_path=False,
5958
analyse=self._analyse,
6059
limited=True,
6160
tag="metadata",
@@ -71,7 +70,6 @@ def _handle_fractions(self, directory: Path, first_loop: bool):
7170
# This allows you to avoid triggering processing again if Murfey is restarted
7271
self.notify(
7372
d02,
74-
include_mid_path=False,
7573
remove_files=True,
7674
analyse=(
7775
not (first_loop and self._skip_existing_processing)
@@ -90,7 +88,6 @@ def _handle_fractions(self, directory: Path, first_loop: bool):
9088
):
9189
self.notify(
9290
directory,
93-
include_mid_path=False,
9491
analyse=(
9592
not (first_loop and self._skip_existing_processing)
9693
if self._analyse
@@ -108,7 +105,6 @@ def _process(self):
108105
if d.is_dir() and d not in self._seen_dirs:
109106
self.notify(
110107
d,
111-
include_mid_path=False,
112108
use_suggested_path=False,
113109
analyse=(
114110
(

tests/client/test_destinations.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
from pathlib import Path
2+
from unittest import mock
3+
4+
import pytest
5+
6+
from murfey.client.destinations import determine_default_destination
7+
8+
source_list = [
9+
["X:/DoseFractions/cm12345-6/Supervisor", "Supervisor", True, "extra_name"],
10+
["X:/DoseFractions/Supervisor/Images-Disc1", "Supervisor", False, ""],
11+
[
12+
"X:/DoseFractions/DATA/Supervisor/Sample1",
13+
"Supervisor_Sample1",
14+
False,
15+
"extra_name",
16+
],
17+
]
18+
19+
20+
@mock.patch("murfey.client.destinations.capture_get")
21+
@mock.patch("murfey.client.destinations.capture_post")
22+
@pytest.mark.parametrize("sources", source_list)
23+
def test_determine_default_destinations_suggested_path(mock_post, mock_get, sources):
24+
mock_environment = mock.Mock()
25+
mock_environment.murfey_session = 2
26+
mock_environment.instrument_name = "m01"
27+
mock_environment.destination_registry = {}
28+
29+
source, source_name, touch, extra_directory = sources
30+
31+
mock_get().json.return_value = {
32+
"data_directories": ["X:/DoseFractions", "X:/DoseFractions/DATA"]
33+
}
34+
mock_post().json.return_value = {"suggested_path": "/base_path/2025/cm12345-6/raw"}
35+
36+
destination = determine_default_destination(
37+
visit="cm12345-6",
38+
source=Path(source),
39+
destination="2025",
40+
environment=mock_environment,
41+
token="token",
42+
touch=touch,
43+
extra_directory=extra_directory,
44+
use_suggested_path=True,
45+
)
46+
mock_get.assert_any_call(
47+
base_url=str(mock_environment.url.geturl()),
48+
router_name="session_control.router",
49+
function_name="machine_info_by_instrument",
50+
token="token",
51+
instrument_name="m01",
52+
)
53+
mock_post.assert_any_call(
54+
base_url=str(mock_environment.url.geturl()),
55+
router_name="file_io_instrument.router",
56+
function_name="suggest_path",
57+
token="token",
58+
visit_name="cm12345-6",
59+
session_id=2,
60+
data={
61+
"base_path": "2025/cm12345-6/raw",
62+
"touch": touch,
63+
"extra_directory": extra_directory,
64+
},
65+
)
66+
67+
assert destination == f"/base_path/2025/cm12345-6/raw/{extra_directory}"
68+
assert (
69+
mock_environment.destination_registry.get(source_name)
70+
== "/base_path/2025/cm12345-6/raw"
71+
)
72+
73+
74+
@mock.patch("murfey.client.destinations.capture_get")
75+
@pytest.mark.parametrize("sources", source_list)
76+
def test_determine_default_destinations_skip_suggested(mock_get, sources):
77+
mock_environment = mock.Mock()
78+
mock_environment.murfey_session = 2
79+
mock_environment.instrument_name = "m01"
80+
mock_environment.destination_registry = {}
81+
82+
source, source_name, touch, extra_directory = sources
83+
84+
mock_get().json.return_value = {
85+
"data_directories": ["X:/DoseFractions", "X:/DoseFractions/DATA"]
86+
}
87+
88+
destination = determine_default_destination(
89+
visit="cm12345-6",
90+
source=Path(source),
91+
destination="2025",
92+
environment=mock_environment,
93+
token="token",
94+
touch=touch,
95+
extra_directory=extra_directory,
96+
use_suggested_path=False,
97+
)
98+
mock_get.assert_any_call(
99+
base_url=str(mock_environment.url.geturl()),
100+
router_name="session_control.router",
101+
function_name="machine_info_by_instrument",
102+
token="token",
103+
instrument_name="m01",
104+
)
105+
106+
assert destination == f"2025/cm12345-6/{Path(source).name}/{extra_directory}"
107+
108+
109+
parameter_list_fail_cases = [
110+
["X:/DoseFractions/cm12345-6", "", "2025", "X:/DoseFractions"],
111+
["X:/DoseFractions/cm12345-6", "cm12345-6", "", "X:/DoseFractions"],
112+
["X:/DoseFractions", "cm12345-6", "2025", "X:/DoseFractions"],
113+
["X:/cm12345-6", "cm12345-6", "2025", "X:/DoseFractions"],
114+
]
115+
116+
117+
@mock.patch("murfey.client.destinations.capture_get")
118+
@pytest.mark.parametrize("destination_params", parameter_list_fail_cases)
119+
def test_determine_default_destinations_failures(mock_get, destination_params):
120+
"""
121+
Test failure of the following cases:
122+
No visit, no destination, source = default, source not in default
123+
"""
124+
source, visit, destination, default_dests = destination_params
125+
mock_environment = mock.Mock()
126+
with pytest.raises(ValueError):
127+
determine_default_destination(
128+
visit=visit,
129+
source=Path(source),
130+
destination=destination,
131+
environment=mock_environment,
132+
token="token",
133+
)

0 commit comments

Comments
 (0)