Skip to content

Commit 1d96699

Browse files
authored
Added logic to account for year rolling over during data collection session (#425)
* Added logic to 'suggest_path' API endpoint to check for the current visit under the previous year in the event the year rolls over during a data collection session * Added similar year-finding logic to the 'process_gain' API endpoint
1 parent 208f4fb commit 1d96699

File tree

2 files changed

+82
-4
lines changed

2 files changed

+82
-4
lines changed

src/murfey/server/api/__init__.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,31 @@ def suggest_path(
12851285
raise ValueError(
12861286
"No machine configuration set when suggesting destination path"
12871287
)
1288+
1289+
# Construct the full path to where the dataset is to be saved
12881290
check_path = machine_config.rsync_basepath / base_path
1291+
1292+
# Check previous year to account for the year rolling over during data collection
1293+
if not check_path.exists():
1294+
base_path_parts = base_path.split("/")
1295+
for part in base_path_parts:
1296+
# Find the path part corresponding to the year
1297+
if len(part) == 4 and part.isdigit():
1298+
year_idx = base_path_parts.index(part)
1299+
base_path_parts[year_idx] = str(int(part) - 1)
1300+
base_path = "/".join(base_path_parts)
1301+
check_path_prev = check_path
1302+
check_path = machine_config.rsync_basepath / base_path
1303+
1304+
# If it's not in the previous year either, it's a genuine error
1305+
if not check_path.exists():
1306+
log_message = (
1307+
"Unable to find current visit folder under "
1308+
f"{str(check_path_prev)!r} or {str(check_path)!r}"
1309+
)
1310+
log.error(log_message)
1311+
raise FileNotFoundError(log_message)
1312+
12891313
check_path_name = check_path.name
12901314
while check_path.exists():
12911315
count = count + 1 if count else 2
@@ -1478,6 +1502,26 @@ async def process_gain(
14781502
/ secure_filename(visit_name)
14791503
/ machine_config.gain_directory_name
14801504
)
1505+
1506+
# Check under previous year if the folder doesn't exist
1507+
if not filepath.exists():
1508+
filepath_prev = filepath
1509+
filepath = (
1510+
Path(machine_config.rsync_basepath)
1511+
/ (machine_config.rsync_module or "data")
1512+
/ str(datetime.datetime.now().year - 1)
1513+
/ secure_filename(visit_name)
1514+
/ machine_config.gain_directory_name
1515+
)
1516+
# If it's not in the previous year, it's a genuine error
1517+
if not filepath.exists():
1518+
log_message = (
1519+
"Unable to find gain reference directory under "
1520+
f"{str(filepath_prev)!r} or {str(filepath)}"
1521+
)
1522+
log.error(log_message)
1523+
raise FileNotFoundError(log_message)
1524+
14811525
if gain_reference_params.eer:
14821526
new_gain_ref, new_gain_ref_superres = await prepare_eer_gain(
14831527
filepath / safe_path_name,

src/murfey/server/demo_api.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ class Settings(BaseSettings):
110110

111111
settings = Settings()
112112

113-
machine_config: dict = {}
113+
machine_config: dict[str, MachineConfig] = {}
114114
if settings.murfey_machine_configuration:
115115
microscope = get_microscope()
116116
machine_config = from_file(Path(settings.murfey_machine_configuration), microscope)
@@ -1290,12 +1290,46 @@ def suggest_path(
12901290
instrument_name = (
12911291
db.exec(select(Session).where(Session.id == session_id)).one().instrument_name
12921292
)
1293-
check_path = (
1294-
machine_config[instrument_name].rsync_basepath / params.base_path
1293+
rsync_basepath = (
1294+
machine_config[instrument_name].rsync_basepath
12951295
if machine_config
1296-
else Path(f"/dls/{get_microscope()}") / params.base_path
1296+
else Path(f"/dls/{get_microscope()}")
12971297
)
1298+
check_path = rsync_basepath / params.base_path
12981299
check_path = check_path.parent / f"{check_path.stem}{count}{check_path.suffix}"
1300+
check_path = check_path.resolve()
1301+
1302+
# Check for path traversal attempt
1303+
if not str(check_path).startswith(str(rsync_basepath)):
1304+
raise Exception(f"Path traversal attempt detected: {str(check_path)!r}")
1305+
1306+
# Check previous year to account for the year rolling over during data collection
1307+
if not sanitise_path(check_path).exists():
1308+
base_path_parts = list(params.base_path.parts)
1309+
for part in base_path_parts:
1310+
# Find the path part corresponding to the year
1311+
if len(part) == 4 and part.isdigit():
1312+
year_idx = base_path_parts.index(part)
1313+
base_path_parts[year_idx] = str(int(part) - 1)
1314+
base_path = "/".join(base_path_parts)
1315+
check_path_prev = check_path
1316+
check_path = rsync_basepath / base_path
1317+
check_path = check_path.parent / f"{check_path.stem}{count}{check_path.suffix}"
1318+
check_path = check_path.resolve()
1319+
1320+
# Check for path traversal attempt
1321+
if not str(check_path).startswith(str(rsync_basepath)):
1322+
raise Exception(f"Path traversal attempt detected: {str(check_path)!r}")
1323+
1324+
# If visit is not in the previous year either, it's a genuine error
1325+
if not check_path.exists():
1326+
log_message = sanitise(
1327+
"Unable to find current visit folder under "
1328+
f"{str(check_path_prev)!r} or {str(check_path)!r}"
1329+
)
1330+
log.error(log_message)
1331+
raise FileNotFoundError(log_message)
1332+
12991333
check_path_name = check_path.name
13001334
while sanitise_path(check_path).exists():
13011335
count = count + 1 if count else 2

0 commit comments

Comments
 (0)