Skip to content

Commit 9de0917

Browse files
authored
Added compatibility for pip >v22.2.2 (#277)
* Pulls hidden `.whl.metadata` files from the PyPI simple API as well for newer versions of `pip` to look for.
1 parent f2f6f6b commit 9de0917

File tree

7 files changed

+151
-44
lines changed

7 files changed

+151
-44
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ classifiers = [
3030
"Programming Language :: Python :: 3.12",
3131
]
3232
dependencies = [
33-
"pydantic",
33+
"pydantic<2", # Pip hops between installing v2.7 or v1.10 depending on which of the additional dependencies are requested
3434
"requests",
3535
"rich",
3636
"werkzeug",

src/murfey/server/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def sanitise(in_string: str) -> str:
111111
return in_string.replace("\r\n", "").replace("\n", "")
112112

113113

114-
def santise_path(in_path: Path) -> Path:
114+
def sanitise_path(in_path: Path) -> Path:
115115
return Path("/".join(secure_filename(p) for p in in_path.parts))
116116

117117

src/murfey/server/api.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
get_machine_config,
4444
get_microscope,
4545
get_tomo_preproc_params,
46+
sanitise,
4647
templates,
4748
)
4849
from murfey.server.config import from_file, settings
@@ -110,10 +111,6 @@
110111
router = APIRouter()
111112

112113

113-
def sanitise(in_string: str) -> str:
114-
return in_string.replace("\r\n", "").replace("\n", "")
115-
116-
117114
# This will be the homepage for a given microscope.
118115
@router.get("/", response_class=HTMLResponse)
119116
async def root(request: Request):

src/murfey/server/bootstrap.py

Lines changed: 140 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import logging
1818
import random
1919
import re
20+
from urllib.parse import quote
2021

2122
import packaging.version
2223
import requests
@@ -41,10 +42,47 @@
4142
log = logging.getLogger("murfey.server.bootstrap")
4243

4344

45+
def _validate_package_name(package: str) -> bool:
46+
"""
47+
Check that a package name follows PEP 503 naming conventions, containing only
48+
alphanumerics, "_", "-", or "." characters
49+
"""
50+
if re.match(r"^[a-z0-9\-\_\.]+$", package):
51+
return True
52+
else:
53+
return False
54+
55+
56+
def _get_full_path_response(package: str) -> requests.Response:
57+
"""
58+
Validates the package name, sanitises it if valid, and attempts to return a HTTP
59+
response from PyPI.
60+
"""
61+
62+
if _validate_package_name(package):
63+
# Sanitise and normalise package name (PEP 503)
64+
package_clean = quote(re.sub(r"[-_.]+", "-", package.lower()))
65+
66+
# Get HTTP response
67+
url = f"https://pypi.org/simple/{package_clean}"
68+
response = requests.get(url)
69+
70+
if response.status_code == 200:
71+
return response
72+
else:
73+
raise HTTPException(status_code=response.status_code)
74+
else:
75+
raise ValueError(f"{package} is not a valid package name")
76+
77+
4478
@pypi.get("/", response_class=Response)
4579
def get_pypi_index():
46-
"""Obtain list of all PyPI packages via the simple API (PEP 503)."""
80+
"""
81+
Obtain list of all PyPI packages via the simple API (PEP 503).
82+
"""
83+
4784
index = requests.get("https://pypi.org/simple/")
85+
4886
return Response(
4987
content=index.content,
5088
media_type=index.headers.get("Content-Type"),
@@ -53,52 +91,115 @@ def get_pypi_index():
5391

5492

5593
@pypi.get("/{package}/", response_class=Response)
56-
def get_pypi_package_downloads_list(package: str):
57-
"""Obtain list of all package downloads from PyPI via the simple API (PEP 503),
58-
and rewrite all download URLs to point to this server,
59-
underneath the current directory."""
60-
full_path_response = requests.get(f"https://pypi.org/simple/{package}")
61-
62-
def rewrite_pypi_url(match):
63-
url = match.group(4)
64-
return (
65-
b"<a "
66-
+ match.group(1)
67-
+ b'href="'
68-
+ url
69-
+ b'"'
70-
+ match.group(3)
71-
+ b">"
72-
+ match.group(4)
73-
+ b"</a>"
74-
)
94+
def get_pypi_package_downloads_list(package: str) -> Response:
95+
"""
96+
Obtain list of all package downloads from PyPI via the simple API (PEP 503), and
97+
rewrite all download URLs to point to this server, under the current directory.
98+
"""
99+
100+
def _rewrite_pypi_url(match):
101+
"""
102+
Use regular expression matching to rewrite the URLs. Points them from
103+
pythonhosted.org to current server, and removes the hash from the URL as well
104+
"""
105+
# url = match.group(4) # Original
106+
url = match.group(3)
107+
return '<a href="' + url + '"' + match.group(2) + ">" + match.group(3) + "</a>"
108+
109+
# Validate package and URL
110+
full_path_response = _get_full_path_response(package)
111+
112+
# Process lines related to PyPI packages in response
113+
content: bytes = full_path_response.content # In bytes
114+
content_text: str = content.decode("latin1") # Convert to strings
115+
content_text_list = []
116+
for line in content_text.splitlines():
117+
# Look for lines with hyperlinks
118+
if "<a href" in line:
119+
# Rewrite URL to point to current proxy server
120+
line_new = re.sub(
121+
'^<a href="([^">]*)"([^>]*)>([^<]*)</a>', # Regex search criteria
122+
_rewrite_pypi_url, # Search criteria applied to this function
123+
line,
124+
)
125+
content_text_list.append(line_new)
126+
127+
# Add entry for wheel metadata (PEP 658; see _expose_wheel_metadata)
128+
if ".whl" in line_new:
129+
line_metadata = line_new.replace(".whl", ".whl.metadata")
130+
content_text_list.append(line_metadata)
131+
else:
132+
# Append other lines as normal
133+
content_text_list.append(line)
134+
135+
content_text_new = str("\n".join(content_text_list)) # Regenerate HTML structure
136+
content_new = content_text_new.encode("latin1") # Convert back to bytes
75137

76-
content = re.sub(
77-
b'<a ([^>]*)href="([^">]*)"([^>]*)>([^<]*)</a>',
78-
rewrite_pypi_url,
79-
full_path_response.content,
80-
)
81138
return Response(
82-
content=content,
139+
content=content_new,
83140
media_type=full_path_response.headers.get("Content-Type"),
84141
status_code=full_path_response.status_code,
85142
)
86143

87144

88145
@pypi.get("/{package}/{filename}", response_class=Response)
89146
def get_pypi_file(package: str, filename: str):
90-
"""Obtain and pass through a specific download for a PyPI package."""
91-
full_path_response = requests.get(f"https://pypi.org/simple/{package}")
147+
"""
148+
Obtain and pass through a specific download for a PyPI package.
149+
"""
150+
151+
def _expose_wheel_metadata(response_bytes: bytes) -> bytes:
152+
"""
153+
As of pip v22.3 (coinciding with PEP 658), pip expects to find an additonal
154+
".whl.metadata" file based on the URL of the ".whl" file present on the PyPI Simple
155+
Index. However, because it is not listed on the webpage itself, it is not copied
156+
across to the proxy. This function adds that URL to the proxy explicitly.
157+
"""
158+
159+
# Analyse API response line-by-line
160+
response_text: str = response_bytes.decode("latin1") # Convert to text
161+
response_text_list = [] # Write line-by-line analysis to here
162+
163+
for line in response_text.splitlines():
164+
# Process URLs
165+
if r"<a href=" in line:
166+
response_text_list.append(line) # Add to list
167+
168+
# Add new line to explicitly call for wheel metadata
169+
if ".whl" in line:
170+
# Add ".metadata" to URL and file name
171+
line_new = line.replace(".whl", ".whl.metadata")
172+
response_text_list.append(line_new) # Add to list
173+
174+
# Append all other lines as normal
175+
else:
176+
response_text_list.append(line)
177+
178+
# Recover original structure
179+
response_text_new = str("\n".join(response_text_list))
180+
response_bytes_new = bytes(response_text_new, encoding="latin-1")
181+
182+
return response_bytes_new
183+
184+
# Validate package and URL
185+
full_path_response = _get_full_path_response(package)
186+
187+
# Get filename in bytes
92188
filename_bytes = re.escape(filename.encode("latin1"))
93189

190+
# Add explicit URLs for ".whl.metadata" files
191+
content = _expose_wheel_metadata(full_path_response.content)
192+
193+
# Find package matching the specified filename
94194
selected_package_link = re.search(
95-
b'<a [^>]*?href="([^">]*)"[^>]*>' + filename_bytes + b"</a>",
96-
full_path_response.content,
195+
b'<a href="([^">]*)"[^>]*>' + filename_bytes + b"</a>",
196+
content,
97197
)
98198
if not selected_package_link:
99199
raise HTTPException(status_code=404, detail="File not found for package")
100200
original_url = selected_package_link.group(1)
101201
original_file = requests.get(original_url)
202+
102203
return Response(
103204
content=original_file.content,
104205
media_type=original_file.headers.get("Content-Type"),
@@ -108,8 +209,10 @@ def get_pypi_file(package: str, filename: str):
108209

109210
@plugins.get("/{package}", response_class=FileResponse)
110211
def get_plugin_wheel(package: str):
212+
111213
machine_config = get_machine_config()
112214
wheel_path = machine_config.plugin_packages.get(package)
215+
113216
if wheel_path is None:
114217
return None
115218
return FileResponse(
@@ -124,6 +227,7 @@ def get_bootstrap_instructions(request: Request):
124227
Return a website containing instructions for installing the Murfey client on a
125228
machine with no internet access.
126229
"""
230+
127231
return respond_with_template(
128232
"bootstrap.html",
129233
{
@@ -140,7 +244,10 @@ def get_pip_wheel():
140244
This is only used during bootstrapping by the client to identify and then
141245
download the actually newest appropriate version of pip.
142246
"""
143-
return get_pypi_file(package="pip", filename="pip-21.3.1-py3-none-any.whl")
247+
return get_pypi_file(
248+
package="pip",
249+
filename="pip-22.2.2-py3-none-any.whl", # Highest version that works before PEP 658 change
250+
)
144251

145252

146253
@bootstrap.get("/murfey.whl", response_class=Response)
@@ -153,6 +260,7 @@ def get_murfey_wheel():
153260
"""
154261
full_path_response = requests.get("https://pypi.org/simple/murfey")
155262
wheels = {}
263+
156264
for wheel_file in re.findall(
157265
b"<a [^>]*>([^<]*).whl</a>",
158266
full_path_response.content,
@@ -174,7 +282,7 @@ def get_murfey_wheel():
174282
@cygwin.get("/setup-x86_64.exe", response_class=Response)
175283
def get_cygwin_setup():
176284
"""
177-
Obtain and past though a Cygwin installer from an official source.
285+
Obtain and pass through a Cygwin installer from an official source.
178286
This is used during client bootstrapping and can download and install the
179287
Cygwin distribution that then remains on the client machines.
180288
"""

src/murfey/server/demo_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
get_hostname,
3333
get_microscope,
3434
sanitise,
35-
santise_path,
35+
sanitise_path,
3636
)
3737
from murfey.server import shutdown as _shutdown
3838
from murfey.server import templates
@@ -968,7 +968,7 @@ def flush_tomography_processing(
968968
async def request_tomography_preprocessing(
969969
visit_name: str, client_id: int, proc_file: ProcessFile, db=murfey_db
970970
):
971-
if not santise_path(Path(proc_file.path)).exists():
971+
if not sanitise_path(Path(proc_file.path)).exists():
972972
log.warning(
973973
f"{sanitise(str(proc_file.path))} has not been transferred before preprocessing"
974974
)

src/murfey/util/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
logger = logging.getLogger("murfey.util")
2222

2323

24+
def sanitise(in_string: str) -> str:
25+
return in_string.replace("\r\n", "").replace("\n", "")
26+
27+
2428
@lru_cache(maxsize=1)
2529
def get_machine_config(url: str, demo: bool = False) -> dict:
2630
return requests.get(f"{url}/machine/").json()

src/murfey/util/lif.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,12 @@
1515
from readlif.reader import LifFile
1616
from tifffile import imwrite
1717

18+
from murfey.util import sanitise
19+
1820
# Create logger object to output messages with
1921
logger = logging.getLogger("murfey.util.lif")
2022

2123

22-
def sanitise(in_string: str) -> str:
23-
return in_string.replace("\r\n", "").replace("\n", "")
24-
25-
2624
def get_xml_metadata(
2725
file: LifFile,
2826
save_xml: Optional[Path] = None,

0 commit comments

Comments
 (0)