|
9 | 9 |
|
10 | 10 | import json |
11 | 11 | import logging |
12 | | -import re |
13 | 12 | import traceback |
14 | 13 | from ast import literal_eval |
15 | 14 | from pathlib import Path |
16 | | -from typing import Optional, Type, Union |
17 | 15 |
|
18 | 16 | from pydantic import BaseModel, validator |
19 | | -from sqlalchemy.exc import NoResultFound |
20 | 17 | from sqlmodel import Session, select |
21 | 18 |
|
22 | 19 | from murfey.server import _transport_object |
23 | | -from murfey.util.config import get_machine_config |
24 | 20 | from murfey.util.db import ( |
25 | 21 | CLEMImageMetadata, |
26 | 22 | CLEMImageSeries, |
|
29 | 25 | CLEMTIFFFile, |
30 | 26 | ) |
31 | 27 | from murfey.util.db import Session as MurfeySession |
| 28 | +from murfey.workflows.clem import get_db_entry |
32 | 29 | from murfey.workflows.clem.align_and_merge import submit_cluster_request |
33 | 30 |
|
34 | 31 | logger = logging.getLogger("murfey.workflows.clem.register_results") |
35 | 32 |
|
36 | 33 |
|
37 | | -def _validate_and_sanitise( |
38 | | - file: Path, |
39 | | - session_id: int, |
40 | | - db: Session, |
41 | | -) -> Path: |
42 | | - """ |
43 | | - Performs validation and sanitisation on the incoming file paths, ensuring that |
44 | | - no forbidden characters are present and that the the path points only to allowed |
45 | | - sections of the file server. |
46 | | -
|
47 | | - Returns the file path as a sanitised string that can be converted into a Path |
48 | | - object again. |
49 | | -
|
50 | | - NOTE: Due to the instrument name query, 'db' now needs to be passed as an |
51 | | - explicit variable to this function from within a FastAPI endpoint, as using the |
52 | | - instance that was imported directly won't load it in the correct state. |
53 | | - """ |
54 | | - |
55 | | - valid_file_types = ( |
56 | | - ".lif", |
57 | | - ".tif", |
58 | | - ".tiff", |
59 | | - ".xlif", |
60 | | - ".xml", |
61 | | - ) |
62 | | - |
63 | | - # Resolve symlinks and directory changes to get full file path |
64 | | - full_path = Path(file).resolve() |
65 | | - |
66 | | - # Use machine configuration to validate which file base paths are accepted from |
67 | | - instrument_name = ( |
68 | | - db.exec(select(MurfeySession).where(MurfeySession.id == session_id)) |
69 | | - .one() |
70 | | - .instrument_name |
71 | | - ) |
72 | | - machine_config = get_machine_config(instrument_name=instrument_name)[ |
73 | | - instrument_name |
74 | | - ] |
75 | | - rsync_basepath = machine_config.rsync_basepath |
76 | | - try: |
77 | | - base_path = list(rsync_basepath.parents)[-2].as_posix() |
78 | | - except IndexError: |
79 | | - logger.warning(f"Base path {rsync_basepath!r} is too short") |
80 | | - base_path = rsync_basepath.as_posix() |
81 | | - except Exception as e: |
82 | | - raise Exception( |
83 | | - f"Unexpected exception encountered when loading the file base path: {e}" |
84 | | - ) |
85 | | - |
86 | | - # Check that full file path doesn't contain unallowed characters |
87 | | - # Currently allows only: |
88 | | - # - words (alphanumerics and "_"; \w), |
89 | | - # - spaces (\s), |
90 | | - # - periods, |
91 | | - # - dashes, |
92 | | - # - forward slashes ("/") |
93 | | - if bool(re.fullmatch(r"^[\w\s\.\-/]+$", str(full_path))) is False: |
94 | | - raise ValueError(f"Unallowed characters present in {file}") |
95 | | - |
96 | | - # Check that it's not accessing somehwere it's not allowed |
97 | | - if not str(full_path).startswith(str(base_path)): |
98 | | - raise ValueError(f"{file} points to a directory that is not permitted") |
99 | | - |
100 | | - # Check that it's a file, not a directory |
101 | | - if full_path.is_file() is False: |
102 | | - raise ValueError(f"{file} is not a file") |
103 | | - |
104 | | - # Check that it is of a permitted file type |
105 | | - if f"{full_path.suffix}" not in valid_file_types: |
106 | | - raise ValueError(f"{full_path.suffix} is not a permitted file format") |
107 | | - |
108 | | - return full_path |
109 | | - |
110 | | - |
111 | | -def get_db_entry( |
112 | | - db: Session, |
113 | | - # With the database search funcion having been moved out of the FastAPI |
114 | | - # endpoint, the database now has to be explicitly passed within the FastAPI |
115 | | - # endpoint function in order for it to be loaded in the correct state. |
116 | | - table: Type[ |
117 | | - Union[ |
118 | | - CLEMImageMetadata, |
119 | | - CLEMImageSeries, |
120 | | - CLEMImageStack, |
121 | | - CLEMLIFFile, |
122 | | - CLEMTIFFFile, |
123 | | - ] |
124 | | - ], |
125 | | - session_id: int, |
126 | | - file_path: Optional[Path] = None, |
127 | | - series_name: Optional[str] = None, |
128 | | -) -> Union[ |
129 | | - CLEMImageMetadata, |
130 | | - CLEMImageSeries, |
131 | | - CLEMImageStack, |
132 | | - CLEMLIFFile, |
133 | | - CLEMTIFFFile, |
134 | | -]: |
135 | | - """ |
136 | | - Searches the CLEM workflow-related tables in the Murfey database for an entry that |
137 | | - matches the file path or series name within a given session. Returns the entry if |
138 | | - a match is found, otherwise register it as a new entry in the database. |
139 | | - """ |
140 | | - |
141 | | - # Validate that parameters are provided correctly |
142 | | - if file_path is None and series_name is None: |
143 | | - raise ValueError( |
144 | | - "One of either 'file_path' or 'series_name' has to be provided" |
145 | | - ) |
146 | | - if file_path is not None and series_name is not None: |
147 | | - raise ValueError("Only one of 'file_path' or 'series_name' should be provided") |
148 | | - |
149 | | - # Validate file path if provided |
150 | | - if file_path is not None: |
151 | | - try: |
152 | | - file_path = _validate_and_sanitise(file_path, session_id, db) |
153 | | - except Exception: |
154 | | - raise Exception |
155 | | - |
156 | | - # Validate series name to use |
157 | | - if series_name is not None: |
158 | | - if bool(re.fullmatch(r"^[\w\s\.\-/]+$", series_name)) is False: |
159 | | - raise ValueError("One or more characters in the string are not permitted") |
160 | | - |
161 | | - # Return database entry if it exists |
162 | | - try: |
163 | | - db_entry = ( |
164 | | - db.exec( |
165 | | - select(table) |
166 | | - .where(table.session_id == session_id) |
167 | | - .where(table.file_path == str(file_path)) |
168 | | - ).one() |
169 | | - if file_path is not None |
170 | | - else db.exec( |
171 | | - select(table) |
172 | | - .where(table.session_id == session_id) |
173 | | - .where(table.series_name == series_name) |
174 | | - ).one() |
175 | | - ) |
176 | | - # Create and register new entry if not present |
177 | | - except NoResultFound: |
178 | | - db_entry = ( |
179 | | - table( |
180 | | - file_path=str(file_path), |
181 | | - session_id=session_id, |
182 | | - ) |
183 | | - if file_path is not None |
184 | | - else table( |
185 | | - series_name=series_name, |
186 | | - session_id=session_id, |
187 | | - ) |
188 | | - ) |
189 | | - db.add(db_entry) |
190 | | - db.commit() |
191 | | - db.refresh(db_entry) |
192 | | - except Exception: |
193 | | - raise Exception |
194 | | - |
195 | | - return db_entry |
196 | | - |
197 | | - |
198 | 34 | class LIFPreprocessingResult(BaseModel): |
199 | 35 | image_stack: Path |
200 | 36 | metadata: Path |
|
0 commit comments