Skip to content

Commit 16e666f

Browse files
author
Tom Augspurger
authored
[core]: Avoid PyZipFile in code upload (#179)
* [core]: Avoid PyZipFile in code upload This reimplements code upload for directories to avoid PyZipFile.writepy. It (deliberately) uploads `.pyc` files, which imposes a strong constraint that the version of Python running submit on the client matches the version of Python running in the task. Instead, we replicate PyZipFile.writepy's behavior, but we upload `.py` files rather than `.pyc`. The zip-based importer works with both `.py` and `.pyc` files.
1 parent 08cf1d4 commit 16e666f

File tree

4 files changed

+58
-28
lines changed

4 files changed

+58
-28
lines changed

pctasks/client/pctasks/client/client.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import logging
33
import os
44
import pathlib
5-
import zipfile
65
from time import perf_counter
76
from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union
87
from urllib.parse import urlparse
@@ -18,6 +17,7 @@
1817
WorkflowNotFoundError,
1918
)
2019
from pctasks.client.settings import ClientSettings
20+
from pctasks.core.importer import write_code
2121
from pctasks.core.models.record import Record
2222
from pctasks.core.models.response import (
2323
JobPartitionRunRecordListResponse,
@@ -180,17 +180,7 @@ def upload_code(self, local_path: Union[pathlib.Path, str]) -> UploadCodeResult:
180180
raise OSError(f"Path {path} does not exist.")
181181

182182
file_obj: Union[io.BufferedReader, io.BytesIO]
183-
if path.is_file():
184-
file_obj = path.open("rb")
185-
name = path.name
186-
187-
else:
188-
file_obj = io.BytesIO()
189-
with zipfile.PyZipFile(file_obj, "w") as zf:
190-
zf.writepy(str(path))
191-
file_obj.seek(0)
192-
193-
name = path.with_suffix(".zip").name
183+
name, file_obj = write_code(path)
194184

195185
try:
196186
resp = self._call_api(

pctasks/core/pctasks/core/importer.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
# For background, checkout out Recipe 10.11 in the Python Cookbook (3rd edition)
88
from __future__ import annotations
99

10+
import io
1011
import logging
1112
import pathlib
1213
import site
1314
import subprocess
1415
import sys
1516
import zipfile
1617
from tempfile import TemporaryDirectory
17-
from typing import List, Optional
18+
from typing import List, Optional, Tuple, Union
1819

1920
from pctasks.core.storage import Storage
2021

@@ -133,3 +134,32 @@ def ensure_code(
133134
sys.path.insert(0, str(output_path))
134135

135136
return pathlib.Path(output_path)
137+
138+
139+
def write_code(
140+
file_path: pathlib.Path,
141+
) -> Tuple[str, Union[io.BufferedReader, io.BytesIO]]:
142+
"""
143+
Like :meth:`zipfile.PyZipFile.writepy`, but doesn't use ``.pyc`` files.
144+
145+
Parameters
146+
----------
147+
file_path: pathlib.Path
148+
The the Path object that's the directory of code you want to upload.
149+
"""
150+
file_path = pathlib.Path(file_path)
151+
152+
file_obj: Union[io.BufferedReader, io.BytesIO]
153+
if file_path.is_file():
154+
file_obj = file_path.open("rb")
155+
name = file_path.name
156+
157+
else:
158+
name = file_path.with_suffix(".zip").name
159+
file_obj = io.BytesIO()
160+
with zipfile.ZipFile(file_obj, "w") as zf:
161+
for path in file_path.glob("**/*.py"):
162+
zf.write(path, path.relative_to(file_path.parent))
163+
164+
file_obj.seek(0)
165+
return name, file_obj

pctasks/core/pctasks/core/storage/base.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
import hashlib
2-
import io
32
import logging
43
import pathlib
5-
import zipfile
64
from abc import ABC, abstractmethod
75
from dataclasses import dataclass
86
from datetime import datetime as Datetime
9-
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple
7+
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Union
108

119
import orjson
1210

@@ -131,23 +129,17 @@ def upload_bytes(
131129
) -> None:
132130
"""Upload bytes to a storage file."""
133131

134-
def upload_code(self, file_path: str) -> str:
132+
def upload_code(self, file_path: Union[str, pathlib.Path]) -> str:
135133
"""Upload a Python module or package."""
134+
from pctasks.core.importer import write_code
135+
136136
path = pathlib.Path(file_path)
137137

138138
if not path.exists():
139139
raise OSError(f"Path {path} does not exist.")
140140

141-
if path.is_file():
142-
data = path.read_bytes()
143-
name = path.name
144-
else:
145-
buf = io.BytesIO()
146-
with zipfile.PyZipFile(buf, "w") as zf:
147-
zf.writepy(str(path))
148-
149-
data = buf.getvalue()
150-
name = path.with_suffix(".zip").name
141+
name, buf = write_code(path)
142+
data = buf.read()
151143

152144
token = hashlib.md5(data).hexdigest()
153145
dst_path = f"{token}/{name}"

pctasks/core/tests/storage/test_importer.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
import subprocess
33
import sys
44
import unittest.mock
5+
import zipfile
56
from pathlib import Path
67
from tempfile import TemporaryDirectory
78
from typing import Optional
89

9-
from pctasks.core.importer import ensure_code, ensure_requirements
10+
from pctasks.core.importer import ensure_code, ensure_requirements, write_code
1011
from pctasks.core.storage.blob import BlobStorage
1112
from pctasks.dev.blob import temp_azurite_blob_storage
1213

@@ -109,3 +110,20 @@ def test_ensure_requirements_target_dir():
109110
finally:
110111
if target_dir in sys.path:
111112
sys.path.remove(target_dir)
113+
114+
115+
def test_write_code():
116+
with TemporaryDirectory() as temp_dir:
117+
p = Path(temp_dir)
118+
pkg = p / "my_package"
119+
module = pkg / "my_file.py"
120+
module.parent.mkdir(exist_ok=True)
121+
module.touch()
122+
123+
name, buf = write_code(pkg)
124+
125+
assert name == "my_package.zip"
126+
127+
with zipfile.ZipFile(buf) as zf:
128+
assert len(zf.filelist) == 1
129+
assert zf.filelist[0].filename == "my_package/my_file.py"

0 commit comments

Comments
 (0)