Skip to content

Commit 161a9ba

Browse files
authored
Updated code documentation (#290)
* Updated installation instructions for Murfey to reflect recent setup modernisation work. * Incremental addition of comments and descriptions to functions and variables.
1 parent 88d3320 commit 161a9ba

File tree

7 files changed

+88
-31
lines changed

7 files changed

+88
-31
lines changed

README.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,22 @@ Murfey, the package, is named after [Eliza Murfey, the inventor](https://nationa
1414
### How do I set up a development environment?
1515

1616
We suggest you start with your favourite virtual environment (mamba/conda/python virtualenv/...),
17-
then install the dependencies listed in `requirements_dev.txt` with eg.
17+
then install using the following command.
18+
19+
#### From Git
1820

1921
```bash
2022
$ git clone [email protected]:DiamondLightSource/python-murfey.git
2123
$ cd python-murfey
22-
$ pip install -r requirements_dev.txt
23-
$ pip install -e .[client,server]
24+
$ pip install -e .[client,server,developer]
2425
```
2526

26-
You will also want to set up pre-commits:
27+
The packages included under the `[developer]` installation key contain some helpful tools to aid you with developing Murfey further:
2728

28-
```bash
29-
$ pip install pre-commit
30-
$ pre-commit install
31-
```
29+
- `ipykernel` - Enables interactive code development via Jupyter Notebooks.
30+
- `pre-commit` - Allows for the installation and running of hooks to help with linting, formatting, and type checking your code.
31+
- `pytest` - Used in conjunction with test functions to evaluate the reliability of your code.
32+
- `bump2version` - A nice little script to simplify version control.
3233

3334
Finally, you may want to set up an ISPyB mock database server and a Zocalo
3435
development environment. The instructions for this are out of scope here.

src/murfey/bootstrap/__main__.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,19 @@
1010
from urllib.parse import urlparse
1111
from urllib.request import urlopen
1212

13-
# A script to simplify installing Murfey on a network-isolated machine.
14-
# This could in theory be invoked by
15-
# python -m murfey.bootstrap
16-
# but then you would already have murfey installed, so what is the point.
17-
# More commonly this file will be run directly from a wheel with
18-
# python murfey.whl/murfey/bootstrap
19-
# In this constellation you can not import any other files from the murfey
20-
# package. If you absolutely have to do this then look at the pip package
21-
# how this can be achieved. Also note that only standard library imports
22-
# will be available at that installation stage.
13+
"""
14+
A script to simplify installing Murfey on a network-isolated machine.
15+
This could in theory be invoked by
16+
`python -m murfey.bootstrap`
17+
but then you would already have murfey installed, so what is the point.
18+
19+
More commonly, this file will be run directly from a wheel with
20+
`python murfey.whl/murfey/bootstrap`
21+
In this constellation, you cannot import any other files from the murfey package.
22+
If you absolutely have to do this then look at the pip package for how this can be
23+
achieved. Also note that only standard library imports will be available at that
24+
installation stage.
25+
"""
2326

2427

2528
def _download_to_file(url: str, outfile: str):
@@ -48,6 +51,7 @@ def _download_to_file(url: str, outfile: str):
4851
)
4952

5053

54+
# Main script block
5155
if __name__ == "__main__":
5256
parser = argparse.ArgumentParser("murfey.bootstrap")
5357
parser.add_argument("-?", action="help", help=argparse.SUPPRESS)
@@ -56,26 +60,31 @@ def _download_to_file(url: str, outfile: str):
5660
)
5761
args = parser.parse_args()
5862

59-
# Validate the passed server address and construct a minimal base path string and
60-
# extract the host name for pip installation purposes
63+
# Validate the passed server address
64+
# Construct a minimal base path string
65+
# Extract the host name for pip installation purposes
6166
try:
6267
murfey_url = urlparse(args.server)
6368
except Exception:
6469
exit(f"{args.server} is not a valid URL")
6570
murfey_base = f"{murfey_url.scheme}://{murfey_url.netloc}"
6671
murfey_hostname = murfey_url.netloc.split(":")[0]
6772

73+
# Check that Python version is supported
6874
print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}")
69-
if sys.hexversion < 0x3080000:
75+
# if sys.hexversion < 0x3080000:
76+
if sys.version_info >= (3, 9): # Use version_info tuple instead
7077
exit(
7178
"Your python version is too old to support Murfey. "
72-
"You need at least Python 3.8"
79+
"You need at least Python 3.9"
7380
)
7481

82+
# Step 1: Download pip wheel
7583
print()
7684
print(f"1/4 -- Connecting to murfey server on {murfey_base}...")
7785
_download_to_file(f"{murfey_base}/bootstrap/pip.whl", "pip.whl")
7886

87+
# Step 2: Get pip to install itself
7988
print()
8089
print("2/4 -- Bootstrapping pip")
8190
python = sys.executable
@@ -95,6 +104,7 @@ def _download_to_file(url: str, outfile: str):
95104
exit("Could not bootstrap pip")
96105
os.remove("pip.whl")
97106

107+
# Step 3: Update pip
98108
print()
99109
print("3/4 -- Updating pip")
100110
python = sys.executable
@@ -114,6 +124,7 @@ def _download_to_file(url: str, outfile: str):
114124
if result.returncode:
115125
exit("Could not update pip")
116126

127+
# Step 4: pip install murfey
117128
print()
118129
print("4/4 -- Installing murfey client")
119130
result = subprocess.run(
@@ -131,6 +142,7 @@ def _download_to_file(url: str, outfile: str):
131142
if result.returncode:
132143
exit("Could not install murfey client")
133144

145+
# Write config file
134146
print()
135147
print("Installation completed.")
136148
config = configparser.ConfigParser()

src/murfey/client/analyser.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def __init__(
7171
)
7272

7373
def _find_extension(self, file_path: Path):
74+
"""
75+
Identifies the file extension and stores that information in the class.
76+
"""
7477
if (
7578
required_substrings := self._murfey_config.get(
7679
"data_required_substrings", {}
@@ -81,28 +84,40 @@ def _find_extension(self, file_path: Path):
8184
if not any(r in file_path.name for r in required_substrings):
8285
return []
8386

87+
# Checks for MRC, TIFF, TIF, and EER files if no extension has been defined
8488
if (
8589
file_path.suffix in (".mrc", ".tiff", ".tif", ".eer")
8690
and not self._extension
8791
):
8892
logger.info(f"File extension determined: {file_path.suffix}")
8993
self._extension = file_path.suffix
94+
# Check for TIFF, TIF, or EER if the file's already been assigned an extension
9095
elif (
9196
file_path.suffix in (".tiff", ".tif", ".eer")
9297
and self._extension != file_path.suffix
9398
):
9499
logger.info(f"File extension re-evaluated: {file_path.suffix}")
95100
self._extension = file_path.suffix
101+
# Check for LIF files separately
96102
elif file_path.suffix == ".lif":
97103
self._extension = file_path.suffix
98104

99105
def _find_context(self, file_path: Path) -> bool:
106+
"""
107+
Using various conditionals, identifies what workflow the file is part of, and
108+
assigns the necessary context class to it for subsequent stages of processing
109+
"""
110+
111+
# CLEM workflow check
112+
# Look for LIF files
100113
if file_path.suffix == ".lif":
101114
self._role = "detector"
102115
self._context = CLEMContext("leica", self._basepath)
103116
return True
117+
104118
split_file_name = file_path.name.split("_")
105119
if split_file_name:
120+
# Files starting with "FoilHole" belong to the SPA workflow
106121
if split_file_name[0].startswith("FoilHole"):
107122
if not self._context:
108123
logger.info("Acquisition software: EPU")
@@ -123,9 +138,12 @@ def _find_context(self, file_path: Path) -> bool:
123138
else SPAContext("epu", self._basepath)
124139
)
125140
self.parameters_model = ProcessingParametersSPA
141+
# Assign it the detector attribute if not already present
126142
if not self._role:
127143
self._role = "detector"
128144
return True
145+
146+
# Files starting with "Position" belong to the standard tomography workflow
129147
if (
130148
split_file_name[0] == "Position"
131149
or "[" in file_path.name
@@ -136,25 +154,34 @@ def _find_context(self, file_path: Path) -> bool:
136154
logger.info("Acquisition software: tomo")
137155
self._context = TomographyContext("tomo", self._basepath)
138156
self.parameters_model = PreprocessingParametersTomo
157+
# Assign role if not already present
139158
if not self._role:
159+
# Fractions files attributed to the detector
140160
if (
141161
"Fractions" in split_file_name[-1]
142162
or "fractions" in split_file_name[-1]
143163
):
144164
self._role = "detector"
165+
# MDOC files attributed to the microscope
145166
elif (
146167
file_path.suffix == ".mdoc"
147168
or file_path.with_suffix(".mdoc").is_file()
148169
):
149170
self._role = "microscope"
171+
# Attribute all other files to the detector
150172
else:
151173
self._role = "detector"
152174
return True
175+
176+
# Files with these suffixes belong to the serial EM tomography workflow
153177
if file_path.suffix in (".mrc", ".tiff", ".tif", ".eer"):
178+
# Ignore batch files and search maps
154179
if any(p in file_path.parts for p in ("Batch", "SearchMaps")):
155180
return False
181+
# Ignore JPG files
156182
if file_path.with_suffix(".jpg").is_file():
157183
return False
184+
# Ignore the averaged movies written out by the Falcon
158185
if (
159186
len(
160187
list(
@@ -165,7 +192,6 @@ def _find_context(self, file_path: Path) -> bool:
165192
)
166193
> 1
167194
):
168-
# This covers the case of ignoring the averaged movies written out by the Falcon
169195
return False
170196
self._context = TomographyContext("serialem", self._basepath)
171197
self.parameters_model = PreprocessingParametersTomo

src/murfey/client/contexts/clem.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
def _file_transferred_to(
2424
environment: MurfeyInstanceEnvironment, source: Path, file_path: Path
2525
) -> Optional[Path]:
26+
"""
27+
Returns the Path of the transferred file on the DLS file system
28+
"""
2629
machine_config = get_machine_config(
2730
str(environment.url.geturl()), demo=environment.demo
2831
)
@@ -38,6 +41,9 @@ def _file_transferred_to(
3841
def _get_source(
3942
file_path: Path, environment: MurfeyInstanceEnvironment
4043
) -> Optional[Path]:
44+
"""
45+
Returns the Path of the file on the client PC
46+
"""
4147
for s in environment.sources:
4248
if file_path.is_relative_to(s):
4349
return s
@@ -87,16 +93,13 @@ def post_transfer(
8793
file_path=transferred_file,
8894
)
8995

90-
# Get the file size and timestamp from transferred_file
91-
# Client PC cannot see file_path; that is for server PC
92-
9396
# Post the message and logs it if there's an error
9497
capture_post(
9598
url,
9699
json={
97100
"name": str(file_path),
98-
"size": transferred_file.stat().st_size,
99-
"timestamp": transferred_file.stat().st_ctime,
101+
"size": transferred_file.stat().st_size, # File size, in bytes
102+
"timestamp": transferred_file.stat().st_ctime, # For Unix systems, shows last metadata change
100103
"description": "",
101104
},
102105
)

src/murfey/client/tui/screens.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,20 +766,27 @@ def on_button_pressed(self, event: Button.Pressed):
766766
f"{self.app._environment.url.geturl()}/machine/"
767767
).json()
768768
if machine_data.get("upstream_data_download_directory"):
769+
# Create the directory locally to save files to
769770
download_dir = Path(machine_data["upstream_data_download_directory"]) / str(
770771
event.button.label
771772
)
772773
download_dir.mkdir(exist_ok=True)
774+
775+
# Get the paths to the TIFF files generated previously under the same session ID
773776
upstream_tiff_paths_response = requests.get(
774777
f"{self.app._environment.url.geturl()}/visits/{event.button.label}/upstream_tiff_paths"
775778
)
776779
upstream_tiff_paths = upstream_tiff_paths_response.json() or []
780+
781+
# Request to download the TIFF files found
777782
for tp in upstream_tiff_paths:
778783
(download_dir / tp).parent.mkdir(exist_ok=True, parents=True)
784+
# Write TIFF to the specified file path
779785
stream_response = requests.get(
780786
f"{self.app._environment.url.geturl()}/visits/{event.button.label}/upstream_tiff/{tp}",
781787
stream=True,
782788
)
789+
# Write the file chunk-by-chunk to avoid hogging memory
783790
with open(download_dir / tp, "wb") as utiff:
784791
for chunk in stream_response.iter_content(chunk_size=32 * 1024**2):
785792
utiff.write(chunk)

src/murfey/server/api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,7 +1597,9 @@ def failed_client_post(post_info: PostInfo):
15971597
@router.get("/visits/{visit_name}/upstream_visits")
15981598
async def find_upstream_visits(visit_name: str):
15991599
upstream_visits = {}
1600+
# Iterates through provided upstream directories
16001601
for p in machine_config.upstream_data_directories:
1602+
# Looks for visit name in file path
16011603
for v in Path(p).glob(f"{visit_name.split('-')[0]}-*"):
16021604
upstream_visits[v.name] = v / machine_config.processed_directory_name
16031605
return upstream_visits
@@ -1620,6 +1622,10 @@ def _get_upstream_tiff_dirs(visit_name: str) -> List[Path]:
16201622

16211623
@router.get("/visits/{visit_name}/upstream_tiff_paths")
16221624
async def gather_upstream_tiffs(visit_name: str):
1625+
"""
1626+
Looks for TIFF files associated with the current session in the permitted storage
1627+
servers, and returns their relative file paths as a list.
1628+
"""
16231629
upstream_tiff_paths = []
16241630
tiff_dirs = _get_upstream_tiff_dirs(visit_name)
16251631
if not tiff_dirs:

src/murfey/server/config.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@ class MachineConfig(BaseModel):
4141
processed_extra_directory: str = ""
4242
plugin_packages: Dict[str, Path] = {}
4343
software_settings_output_directories: Dict[str, List[str]] = {}
44-
upstream_data_directories: List[Path] = []
45-
upstream_data_download_directory: Optional[Path] = None
46-
upstream_data_tiff_locations: List[str] = ["processed"]
44+
45+
# Find and download upstream directories
46+
upstream_data_directories: List[Path] = [] # Previous sessions
47+
upstream_data_download_directory: Optional[Path] = None # Set by microscope config
48+
upstream_data_tiff_locations: List[str] = ["processed"] # Location of CLEM TIFFs
4749
failure_queue: str = ""
4850

4951

0 commit comments

Comments
 (0)