Skip to content

Commit 32dddd3

Browse files
committed
Merge branch 'v4.4.0-rc-dev'
2 parents 390139c + f4a839d commit 32dddd3

File tree

21 files changed

+502
-551
lines changed

21 files changed

+502
-551
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ jobs:
398398

399399
strategy:
400400
matrix:
401-
os: [ubuntu-20.04, macos-11, windows-2019]
401+
os: [ubuntu-20.04, macos-12, windows-2022]
402402

403403
# python versions should be consistent with the strategy matrix and the runs-integration-tests versions
404404
python: [3.8, '3.9', '3.10', '3.11']

Pipfile.lock

Lines changed: 331 additions & 368 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/news.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ stored in the `~/.synapseConfig` file.
3535
- \[[SYNPY-1483](https://sagebionetworks.jira.com/browse/SYNPY-1483)\] - Update credential chain to use user args first
3636
- \[[SYNPY-1485](https://sagebionetworks.jira.com/browse/SYNPY-1485)\] - Include isort with pre-commit
3737
- \[[SYNPY-1487](https://sagebionetworks.jira.com/browse/SYNPY-1487)\] - Fix missing coverage.xml bug
38+
- \[[SYNPY-1504](https://sagebionetworks.jira.com/browse/SYNPY-1504)\] - Review usage of download_location and path parameters on File class
3839

3940
## 4.3.0 (2024-05-30)
4041
### Highlights

docs/scripts/object_orientated_programming_poc/oop_poc_file.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def store_file():
131131
# 5. Downloading a file ===============================================================
132132
# Downloading a file to a location has a default beahvior of "keep.both"
133133
downloaded_file = File(
134-
id=file.id, download_location=os.path.expanduser("~/temp/myNewFolder")
134+
id=file.id, path=os.path.expanduser("~/temp/myNewFolder")
135135
).get()
136136
print(f"Downloaded file: {downloaded_file.path}")
137137

@@ -142,7 +142,7 @@ def store_file():
142142
print(f"Before file md5: {utils.md5_for_file(path_to_file).hexdigest()}")
143143
downloaded_file = File(
144144
id=downloaded_file.id,
145-
download_location=os.path.expanduser("~/temp/myNewFolder"),
145+
path=os.path.expanduser("~/temp/myNewFolder"),
146146
if_collision="overwrite.local",
147147
).get()
148148
print(f"After file md5: {utils.md5_for_file(path_to_file).hexdigest()}")
@@ -153,7 +153,7 @@ def store_file():
153153
print(f"Before file md5: {utils.md5_for_file(path_to_file).hexdigest()}")
154154
downloaded_file = File(
155155
id=downloaded_file.id,
156-
download_location=os.path.expanduser("~/temp/myNewFolder"),
156+
path=os.path.expanduser("~/temp/myNewFolder"),
157157
if_collision="keep.local",
158158
).get()
159159
print(f"After file md5: {utils.md5_for_file(path_to_file).hexdigest()}")

setup.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ install_requires =
5656
asyncio-atexit~=1.0.1
5757
httpx~=0.27.0
5858
tqdm>=4.66.2,<5.0
59-
loky~=3.0.0
6059
async-lru~=2.0.4
6160
psutil~=5.9.8
6261
tests_require =

synapseclient/client.py

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import httpx
3535
import requests
3636
from deprecated import deprecated
37-
from loky import get_reusable_executor
3837
from opentelemetry import trace
3938
from opentelemetry.trace import SpanKind
4039

@@ -410,7 +409,6 @@ def log_response(response: httpx.Response) -> None:
410409
self._thread_executor = {}
411410
self._process_executor = {}
412411
self._parallel_file_transfer_semaphore = {}
413-
self._md5_semaphore = {}
414412
self.use_boto_sts_transfers = transfer_config["use_boto_sts"]
415413
self._parts_transfered_counter = 0
416414

@@ -522,55 +520,6 @@ def close_pool() -> None:
522520
asyncio_atexit.register(close_pool)
523521
return self._thread_executor[asyncio_event_loop]
524522

525-
def _get_process_pool_executor(self, asyncio_event_loop: asyncio.AbstractEventLoop):
526-
"""
527-
Retrieve the process pool executor for the Synapse client. Or create a new one
528-
if it does not exist. This executor is used for parallel processing of data.
529-
530-
This is expected to be called from within an AsyncIO loop.
531-
532-
Note: Within Windows a ProcessPoolExecutor requires that the initial entry point
533-
into the code be within a `if __name__ == "__main__":` block. This is not
534-
possible within the current codebase as it would require everyone using this
535-
library to have this as their entry point. As a result, the ProcessPoolExecutor
536-
will not work within Windows.
537-
538-
To get around this Windows limitation this is using this package:
539-
https://github.com/joblib/loky
540-
"""
541-
if (
542-
hasattr(self, "_process_executor")
543-
and asyncio_event_loop in self._process_executor
544-
and self._process_executor[asyncio_event_loop] is not None
545-
):
546-
return self._process_executor[asyncio_event_loop]
547-
548-
self._process_executor.update({asyncio_event_loop: get_reusable_executor(1)})
549-
550-
return self._process_executor[asyncio_event_loop]
551-
552-
def _get_md5_semaphore(
553-
self, asyncio_event_loop: asyncio.AbstractEventLoop
554-
) -> asyncio.Semaphore:
555-
"""
556-
Retrieve the semaphore for the Synapse client. Or create a new one if it does not
557-
exist. This semaphore is used to ensure that only one process is calculating the
558-
MD5 hash at a time. This is to prevent the custom process pool executor from
559-
thrashing or handling the waiting. We should let asyncio handle the waiting.
560-
561-
This is expected to be called from within an AsyncIO loop.
562-
"""
563-
if (
564-
hasattr(self, "_md5_semaphore")
565-
and asyncio_event_loop in self._md5_semaphore
566-
and self._md5_semaphore[asyncio_event_loop] is not None
567-
):
568-
return self._md5_semaphore[asyncio_event_loop]
569-
570-
self._md5_semaphore.update({asyncio_event_loop: asyncio.Semaphore(1)})
571-
572-
return self._md5_semaphore[asyncio_event_loop]
573-
574523
def _get_parallel_file_transfer_semaphore(
575524
self, asyncio_event_loop: asyncio.AbstractEventLoop
576525
) -> asyncio.Semaphore:

synapseclient/core/download/download_async.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ def _stream_and_write_chunk(
473473
retry_errors=RETRYABLE_CONNECTION_ERRORS,
474474
retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS,
475475
retry_max_back_off=DEFAULT_MAX_BACK_OFF_ASYNC,
476+
read_response_content=False,
476477
)
477478

478479
return start, end
@@ -531,7 +532,9 @@ def _execute_stream_and_write_chunk(
531532
with session.stream(
532533
method="GET", url=presigned_url_provider.get_info().url, headers=range_header
533534
) as response:
534-
_raise_for_status_httpx(response=response, logger=request._syn.logger)
535+
_raise_for_status_httpx(
536+
response=response, logger=request._syn.logger, read_response_content=False
537+
)
535538
data = response.read()
536539
data_length = len(data)
537540
request._write_chunk(

synapseclient/core/exceptions.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,10 @@ def _raise_for_status(response, verbose=False):
191191

192192

193193
def _raise_for_status_httpx(
194-
response: httpx.Response, logger: logging.Logger, verbose: bool = False
194+
response: httpx.Response,
195+
logger: logging.Logger,
196+
verbose: bool = False,
197+
read_response_content: bool = True,
195198
) -> None:
196199
"""
197200
Replacement for requests.response.raise_for_status().
@@ -200,8 +203,12 @@ def _raise_for_status_httpx(
200203
201204
Arguments:
202205
response: The response object from the HTTPX request.
206+
logger: The logger object to log any exceptions.
203207
verbose: If True, the request and response information will be appended to the
204208
error message.
209+
read_response_content: If True, the response content will be read and appended
210+
to the error message. If False, the response content will not be read and
211+
appended to the error message.
205212
"""
206213

207214
message = None
@@ -242,7 +249,7 @@ def _raise_for_status_httpx(
242249
# 450: 'blocked_by_windows_parental_controls'
243250
# 451: 'unavailable_for_legal_reasons'
244251
# 499: 'client_closed_request'
245-
message_body = _get_message(response, logger)
252+
message_body = _get_message(response, logger) if read_response_content else ""
246253
message = f"{response.status_code} {CLIENT_ERROR} {message_body}"
247254

248255
elif 500 <= response.status_code < 600:
@@ -257,7 +264,7 @@ def _raise_for_status_httpx(
257264
# 507: 'insufficient_storage'
258265
# 509: 'bandwidth_limit_exceeded'
259266
# 510: 'not_extended'
260-
message_body = _get_message(response, logger)
267+
message_body = _get_message(response, logger) if read_response_content else ""
261268
message = f"{response.status_code} {SERVER_ERROR} {message_body}"
262269

263270
if message is not None:
@@ -275,7 +282,8 @@ def _raise_for_status_httpx(
275282
# Append the response received
276283
message += f"\n\n{RESPONSE_PREFIX}\n{str(response)}"
277284
message += f"\n{HEADERS_PREFIX}{response.headers}"
278-
message += f"\n{BODY_PREFIX}{message_body}\n\n"
285+
if read_response_content:
286+
message += f"\n{BODY_PREFIX}{message_body}\n\n"
279287
except Exception: # noqa
280288
logger.exception(UNABLE_TO_APPEND_RESPONSE)
281289
message += f"\n{UNABLE_TO_APPEND_RESPONSE}"

synapseclient/core/retry.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@
6464
"timeout",
6565
"ReadError",
6666
"ReadTimeout",
67+
# HTTPX Specific connection exceptions:
68+
"RemoteProtocolError",
69+
"TimeoutException",
70+
"ConnectError",
71+
"ConnectTimeout",
6772
]
6873

6974
DEBUG_EXCEPTION = "calling %s resulted in an Exception"
@@ -272,6 +277,7 @@ async def with_retry_time_based_async(
272277
retry_back_off_factor: float = DEFAULT_BACK_OFF_FACTOR_ASYNC,
273278
retry_max_back_off: float = DEFAULT_MAX_BACK_OFF_ASYNC,
274279
retry_max_wait_before_failure: float = DEFAULT_MAX_WAIT_BEFORE_FAIL_ASYNC,
280+
read_response_content: bool = True,
275281
) -> Union[Exception, httpx.Response, Any, None]:
276282
"""
277283
Retries the given function under certain conditions. This is created such that it
@@ -297,6 +303,7 @@ async def with_retry_time_based_async(
297303
retry_back_off_factor: The factor to increase the wait time by for each retry.
298304
retry_max_back_off: The maximum wait time.
299305
retry_max_wait_before_failure: The maximum wait time before failure.
306+
read_response_content: Whether to read the response content for HTTP requests.
300307
301308
Example: Using with_retry
302309
Using ``with_retry_time_based_async`` to consolidate inputs into a list.
@@ -353,7 +360,10 @@ async def foo(a, b, c): return [a, b, c]
353360
retries += 1
354361
if total_wait < retry_max_wait_before_failure and retry:
355362
_log_for_retry(
356-
logger=logger, response=response, caught_exception=caught_exception
363+
logger=logger,
364+
response=response,
365+
caught_exception=caught_exception,
366+
read_response_content=read_response_content,
357367
)
358368

359369
backoff_wait = calculate_exponential_backoff(
@@ -395,6 +405,7 @@ def with_retry_time_based(
395405
retry_back_off_factor: float = DEFAULT_BACK_OFF_FACTOR_ASYNC,
396406
retry_max_back_off: float = DEFAULT_MAX_BACK_OFF_ASYNC,
397407
retry_max_wait_before_failure: float = DEFAULT_MAX_WAIT_BEFORE_FAIL_ASYNC,
408+
read_response_content: bool = True,
398409
) -> Union[Exception, httpx.Response, Any, None]:
399410
"""
400411
Retries the given function under certain conditions. This is created such that it
@@ -420,6 +431,7 @@ def with_retry_time_based(
420431
retry_back_off_factor: The factor to increase the wait time by for each retry.
421432
retry_max_back_off: The maximum wait time.
422433
retry_max_wait_before_failure: The maximum wait time before failure.
434+
read_response_content: Whether to read the response content for HTTP requests.
423435
424436
Example: Using with_retry
425437
Using ``with_retry_time_based`` to consolidate inputs into a list.
@@ -476,7 +488,10 @@ async def foo(a, b, c): return [a, b, c]
476488
retries += 1
477489
if total_wait < retry_max_wait_before_failure and retry:
478490
_log_for_retry(
479-
logger=logger, response=response, caught_exception=caught_exception
491+
logger=logger,
492+
response=response,
493+
caught_exception=caught_exception,
494+
read_response_content=read_response_content,
480495
)
481496

482497
backoff_wait = calculate_exponential_backoff(
@@ -569,16 +584,18 @@ def _log_for_retry(
569584
logger: logging.Logger,
570585
response: httpx.Response = None,
571586
caught_exception: Exception = None,
587+
read_response_content: bool = True,
572588
) -> None:
573589
"""Logs the retry message to debug.
574590
575591
Arguments:
576592
logger: The logger to use for logging the retry message.
577593
response: The response object from the request.
578594
caught_exception: The exception caught from the request.
595+
read_response_content: Whether to read the response content for HTTP requests.
579596
"""
580597
if response is not None:
581-
response_message = _get_message(response)
598+
response_message = _get_message(response) if read_response_content else ""
582599
url_message_part = ""
583600

584601
if hasattr(response, "request") and hasattr(response.request, "url"):

synapseclient/core/upload/multipart_upload_async.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
)
114114
from synapseclient.core.utils import MB
115115
from synapseclient.core.utils import md5_fn as md5_fn_util
116-
from synapseclient.core.utils import md5_for_file_multiprocessing
116+
from synapseclient.core.utils import md5_for_file_hex
117117

118118
if TYPE_CHECKING:
119119
from synapseclient import Synapse
@@ -668,17 +668,7 @@ async def multipart_upload_file_async(
668668
mime_type, _ = mimetypes.guess_type(file_path, strict=False)
669669
content_type = mime_type or "application/octet-stream"
670670

671-
md5_hex = md5 or (
672-
await md5_for_file_multiprocessing(
673-
filename=file_path,
674-
process_pool_executor=syn._get_process_pool_executor(
675-
asyncio_event_loop=asyncio.get_running_loop()
676-
),
677-
md5_semaphore=syn._get_md5_semaphore(
678-
asyncio_event_loop=asyncio.get_running_loop()
679-
),
680-
)
681-
)
671+
md5_hex = md5 or md5_for_file_hex(filename=file_path)
682672

683673
part_size = get_part_size(
684674
part_size or DEFAULT_PART_SIZE,

0 commit comments

Comments
 (0)