Skip to content

Commit f483d4b

Browse files
committed
Moved CLEM database functions to __init__, since they will be shared with other sections of this workflow
1 parent 6ec4f38 commit f483d4b

File tree

2 files changed

+188
-165
lines changed

2 files changed

+188
-165
lines changed
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import re
5+
from pathlib import Path
6+
from typing import Optional, Type, Union
7+
8+
from sqlalchemy.exc import NoResultFound
9+
from sqlmodel import Session, select
10+
11+
from murfey.util.config import get_machine_config
12+
from murfey.util.db import (
13+
CLEMImageMetadata,
14+
CLEMImageSeries,
15+
CLEMImageStack,
16+
CLEMLIFFile,
17+
CLEMTIFFFile,
18+
)
19+
from murfey.util.db import Session as MurfeySession
20+
21+
logger = logging.getLogger("murfey.workflows.clem")
22+
23+
24+
"""
25+
HELPER FUNCTIONS FOR CLEM DATABASE
26+
"""
27+
28+
29+
def _validate_and_sanitise(
30+
file: Path,
31+
session_id: int,
32+
db: Session,
33+
) -> Path:
34+
"""
35+
Performs validation and sanitisation on the incoming file paths, ensuring that
36+
no forbidden characters are present and that the the path points only to allowed
37+
sections of the file server.
38+
39+
Returns the file path as a sanitised string that can be converted into a Path
40+
object again.
41+
42+
NOTE: Due to the instrument name query, 'db' now needs to be passed as an
43+
explicit variable to this function from within a FastAPI endpoint, as using the
44+
instance that was imported directly won't load it in the correct state.
45+
"""
46+
47+
valid_file_types = (
48+
".lif",
49+
".tif",
50+
".tiff",
51+
".xlif",
52+
".xml",
53+
)
54+
55+
# Resolve symlinks and directory changes to get full file path
56+
full_path = Path(file).resolve()
57+
58+
# Use machine configuration to validate which file base paths are accepted from
59+
instrument_name = (
60+
db.exec(select(MurfeySession).where(MurfeySession.id == session_id))
61+
.one()
62+
.instrument_name
63+
)
64+
machine_config = get_machine_config(instrument_name=instrument_name)[
65+
instrument_name
66+
]
67+
rsync_basepath = machine_config.rsync_basepath
68+
try:
69+
base_path = list(rsync_basepath.parents)[-2].as_posix()
70+
except IndexError:
71+
logger.warning(f"Base path {rsync_basepath!r} is too short")
72+
base_path = rsync_basepath.as_posix()
73+
except Exception as e:
74+
raise Exception(
75+
f"Unexpected exception encountered when loading the file base path: {e}"
76+
)
77+
78+
# Check that full file path doesn't contain unallowed characters
79+
# Currently allows only:
80+
# - words (alphanumerics and "_"; \w),
81+
# - spaces (\s),
82+
# - periods,
83+
# - dashes,
84+
# - forward slashes ("/")
85+
if bool(re.fullmatch(r"^[\w\s\.\-/]+$", str(full_path))) is False:
86+
raise ValueError(f"Unallowed characters present in {file}")
87+
88+
# Check that it's not accessing somehwere it's not allowed
89+
if not str(full_path).startswith(str(base_path)):
90+
raise ValueError(f"{file} points to a directory that is not permitted")
91+
92+
# Check that it's a file, not a directory
93+
if full_path.is_file() is False:
94+
raise ValueError(f"{file} is not a file")
95+
96+
# Check that it is of a permitted file type
97+
if f"{full_path.suffix}" not in valid_file_types:
98+
raise ValueError(f"{full_path.suffix} is not a permitted file format")
99+
100+
return full_path
101+
102+
103+
def get_db_entry(
104+
db: Session,
105+
# With the database search funcion having been moved out of the FastAPI
106+
# endpoint, the database now has to be explicitly passed within the FastAPI
107+
# endpoint function in order for it to be loaded in the correct state.
108+
table: Type[
109+
Union[
110+
CLEMImageMetadata,
111+
CLEMImageSeries,
112+
CLEMImageStack,
113+
CLEMLIFFile,
114+
CLEMTIFFFile,
115+
]
116+
],
117+
session_id: int,
118+
file_path: Optional[Path] = None,
119+
series_name: Optional[str] = None,
120+
) -> Union[
121+
CLEMImageMetadata,
122+
CLEMImageSeries,
123+
CLEMImageStack,
124+
CLEMLIFFile,
125+
CLEMTIFFFile,
126+
]:
127+
"""
128+
Searches the CLEM workflow-related tables in the Murfey database for an entry that
129+
matches the file path or series name within a given session. Returns the entry if
130+
a match is found, otherwise register it as a new entry in the database.
131+
"""
132+
133+
# Validate that parameters are provided correctly
134+
if file_path is None and series_name is None:
135+
raise ValueError(
136+
"One of either 'file_path' or 'series_name' has to be provided"
137+
)
138+
if file_path is not None and series_name is not None:
139+
raise ValueError("Only one of 'file_path' or 'series_name' should be provided")
140+
141+
# Validate file path if provided
142+
if file_path is not None:
143+
try:
144+
file_path = _validate_and_sanitise(file_path, session_id, db)
145+
except Exception:
146+
raise Exception
147+
148+
# Validate series name to use
149+
if series_name is not None:
150+
if bool(re.fullmatch(r"^[\w\s\.\-/]+$", series_name)) is False:
151+
raise ValueError("One or more characters in the string are not permitted")
152+
153+
# Return database entry if it exists
154+
try:
155+
db_entry = (
156+
db.exec(
157+
select(table)
158+
.where(table.session_id == session_id)
159+
.where(table.file_path == str(file_path))
160+
).one()
161+
if file_path is not None
162+
else db.exec(
163+
select(table)
164+
.where(table.session_id == session_id)
165+
.where(table.series_name == series_name)
166+
).one()
167+
)
168+
# Create and register new entry if not present
169+
except NoResultFound:
170+
db_entry = (
171+
table(
172+
file_path=str(file_path),
173+
session_id=session_id,
174+
)
175+
if file_path is not None
176+
else table(
177+
series_name=series_name,
178+
session_id=session_id,
179+
)
180+
)
181+
db.add(db_entry)
182+
db.commit()
183+
db.refresh(db_entry)
184+
except Exception:
185+
raise Exception
186+
187+
return db_entry

src/murfey/workflows/clem/register_preprocessing_results.py

Lines changed: 1 addition & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,14 @@
99

1010
import json
1111
import logging
12-
import re
1312
import traceback
1413
from ast import literal_eval
1514
from pathlib import Path
16-
from typing import Optional, Type, Union
1715

1816
from pydantic import BaseModel, validator
19-
from sqlalchemy.exc import NoResultFound
2017
from sqlmodel import Session, select
2118

2219
from murfey.server import _transport_object
23-
from murfey.util.config import get_machine_config
2420
from murfey.util.db import (
2521
CLEMImageMetadata,
2622
CLEMImageSeries,
@@ -29,172 +25,12 @@
2925
CLEMTIFFFile,
3026
)
3127
from murfey.util.db import Session as MurfeySession
28+
from murfey.workflows.clem import get_db_entry
3229
from murfey.workflows.clem.align_and_merge import submit_cluster_request
3330

3431
logger = logging.getLogger("murfey.workflows.clem.register_results")
3532

3633

37-
def _validate_and_sanitise(
38-
file: Path,
39-
session_id: int,
40-
db: Session,
41-
) -> Path:
42-
"""
43-
Performs validation and sanitisation on the incoming file paths, ensuring that
44-
no forbidden characters are present and that the the path points only to allowed
45-
sections of the file server.
46-
47-
Returns the file path as a sanitised string that can be converted into a Path
48-
object again.
49-
50-
NOTE: Due to the instrument name query, 'db' now needs to be passed as an
51-
explicit variable to this function from within a FastAPI endpoint, as using the
52-
instance that was imported directly won't load it in the correct state.
53-
"""
54-
55-
valid_file_types = (
56-
".lif",
57-
".tif",
58-
".tiff",
59-
".xlif",
60-
".xml",
61-
)
62-
63-
# Resolve symlinks and directory changes to get full file path
64-
full_path = Path(file).resolve()
65-
66-
# Use machine configuration to validate which file base paths are accepted from
67-
instrument_name = (
68-
db.exec(select(MurfeySession).where(MurfeySession.id == session_id))
69-
.one()
70-
.instrument_name
71-
)
72-
machine_config = get_machine_config(instrument_name=instrument_name)[
73-
instrument_name
74-
]
75-
rsync_basepath = machine_config.rsync_basepath
76-
try:
77-
base_path = list(rsync_basepath.parents)[-2].as_posix()
78-
except IndexError:
79-
logger.warning(f"Base path {rsync_basepath!r} is too short")
80-
base_path = rsync_basepath.as_posix()
81-
except Exception as e:
82-
raise Exception(
83-
f"Unexpected exception encountered when loading the file base path: {e}"
84-
)
85-
86-
# Check that full file path doesn't contain unallowed characters
87-
# Currently allows only:
88-
# - words (alphanumerics and "_"; \w),
89-
# - spaces (\s),
90-
# - periods,
91-
# - dashes,
92-
# - forward slashes ("/")
93-
if bool(re.fullmatch(r"^[\w\s\.\-/]+$", str(full_path))) is False:
94-
raise ValueError(f"Unallowed characters present in {file}")
95-
96-
# Check that it's not accessing somehwere it's not allowed
97-
if not str(full_path).startswith(str(base_path)):
98-
raise ValueError(f"{file} points to a directory that is not permitted")
99-
100-
# Check that it's a file, not a directory
101-
if full_path.is_file() is False:
102-
raise ValueError(f"{file} is not a file")
103-
104-
# Check that it is of a permitted file type
105-
if f"{full_path.suffix}" not in valid_file_types:
106-
raise ValueError(f"{full_path.suffix} is not a permitted file format")
107-
108-
return full_path
109-
110-
111-
def get_db_entry(
112-
db: Session,
113-
# With the database search funcion having been moved out of the FastAPI
114-
# endpoint, the database now has to be explicitly passed within the FastAPI
115-
# endpoint function in order for it to be loaded in the correct state.
116-
table: Type[
117-
Union[
118-
CLEMImageMetadata,
119-
CLEMImageSeries,
120-
CLEMImageStack,
121-
CLEMLIFFile,
122-
CLEMTIFFFile,
123-
]
124-
],
125-
session_id: int,
126-
file_path: Optional[Path] = None,
127-
series_name: Optional[str] = None,
128-
) -> Union[
129-
CLEMImageMetadata,
130-
CLEMImageSeries,
131-
CLEMImageStack,
132-
CLEMLIFFile,
133-
CLEMTIFFFile,
134-
]:
135-
"""
136-
Searches the CLEM workflow-related tables in the Murfey database for an entry that
137-
matches the file path or series name within a given session. Returns the entry if
138-
a match is found, otherwise register it as a new entry in the database.
139-
"""
140-
141-
# Validate that parameters are provided correctly
142-
if file_path is None and series_name is None:
143-
raise ValueError(
144-
"One of either 'file_path' or 'series_name' has to be provided"
145-
)
146-
if file_path is not None and series_name is not None:
147-
raise ValueError("Only one of 'file_path' or 'series_name' should be provided")
148-
149-
# Validate file path if provided
150-
if file_path is not None:
151-
try:
152-
file_path = _validate_and_sanitise(file_path, session_id, db)
153-
except Exception:
154-
raise Exception
155-
156-
# Validate series name to use
157-
if series_name is not None:
158-
if bool(re.fullmatch(r"^[\w\s\.\-/]+$", series_name)) is False:
159-
raise ValueError("One or more characters in the string are not permitted")
160-
161-
# Return database entry if it exists
162-
try:
163-
db_entry = (
164-
db.exec(
165-
select(table)
166-
.where(table.session_id == session_id)
167-
.where(table.file_path == str(file_path))
168-
).one()
169-
if file_path is not None
170-
else db.exec(
171-
select(table)
172-
.where(table.session_id == session_id)
173-
.where(table.series_name == series_name)
174-
).one()
175-
)
176-
# Create and register new entry if not present
177-
except NoResultFound:
178-
db_entry = (
179-
table(
180-
file_path=str(file_path),
181-
session_id=session_id,
182-
)
183-
if file_path is not None
184-
else table(
185-
series_name=series_name,
186-
session_id=session_id,
187-
)
188-
)
189-
db.add(db_entry)
190-
db.commit()
191-
db.refresh(db_entry)
192-
except Exception:
193-
raise Exception
194-
195-
return db_entry
196-
197-
19834
class LIFPreprocessingResult(BaseModel):
19935
image_stack: Path
20036
metadata: Path

0 commit comments

Comments
 (0)