Skip to content

Commit a901dbe

Browse files
authored
Merge pull request #33 from atomic-data-sciences/enhancement/windows_integration
Add robustness to Windows usage
2 parents 5cd32f0 + 6cfa360 commit a901dbe

File tree

5 files changed

+116
-23
lines changed

5 files changed

+116
-23
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ dev = [
5050
"pytest-httpserver",
5151
"pytest-cov",
5252
"pytest-order",
53+
"pytest-dependency",
5354
"ruff",
5455
"mypy",
5556
"types-requests",

src/atomicds/client.py

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pycocotools import mask as mask_util
1414

1515
from atomicds.core import BaseClient, ClientError, _FileSlice
16-
from atomicds.core.utils import _make_progress
16+
from atomicds.core.utils import _make_progress, normalize_path
1717
from atomicds.results import RHEEDImageResult, RHEEDVideoResult, XPSResult
1818

1919

@@ -345,7 +345,7 @@ def upload(self, files: list[str | BinaryIO]):
345345
file_data = []
346346
for file in files:
347347
if isinstance(file, str):
348-
path = Path(file)
348+
path = normalize_path(file)
349349
if not (path.exists() and path.is_file()):
350350
raise ClientError(f"{path} is not a file or does not exist")
351351

@@ -363,11 +363,18 @@ def upload(self, files: list[str | BinaryIO]):
363363
file_name = file.name
364364

365365
file_data.append(
366-
{"num_urls": num_urls, "file_name": file_name, "file_size": file_size}
366+
{
367+
"num_urls": num_urls,
368+
"file_name": file_name,
369+
"file_size": file_size,
370+
"file_path": file,
371+
}
367372
)
368373

369374
def __upload_file(
370-
file_info: dict[Literal["num_urls", "file_name", "file_size"], int | str],
375+
file_info: dict[
376+
Literal["num_urls", "file_name", "file_size", "file_path"], int | str
377+
],
371378
):
372379
url_data: list[dict[str, str | int]] = self._post_or_put(
373380
method="POST",
@@ -392,7 +399,7 @@ def __upload_file(
392399
"sub_url": "",
393400
"params": None,
394401
"base_override": part["url"],
395-
"file_name": file_info["file_name"],
402+
"file_path": file_info["file_path"],
396403
"offset": offset,
397404
"length": length,
398405
}
@@ -403,11 +410,11 @@ def __upload_chunk(
403410
sub_url: str,
404411
params: dict[str, Any] | None,
405412
base_override: str,
406-
file_name: str,
413+
file_path: Path,
407414
offset: int,
408415
length: int,
409416
) -> Any:
410-
slice_obj = _FileSlice(file_name, offset, length)
417+
slice_obj = _FileSlice(file_path, offset, length)
411418
return self._post_or_put(
412419
method=method,
413420
sub_url=sub_url,
@@ -435,21 +442,24 @@ def __upload_chunk(
435442
transient=True,
436443
)
437444

438-
# Confirm file upload
439-
etag_body = [
440-
{"ETag": entry["ETag"], "PartNumber": i + 1}
441-
for i, entry in enumerate(etag_data)
442-
]
443-
self._post_or_put(
444-
method="POST",
445-
sub_url="data_entries/raw_data/staged/upload_urls/complete/",
446-
params={"staging_type": "core"},
447-
body={
448-
"upload_id": url_data[0]["upload_id"],
449-
"new_filename": url_data[0]["new_filename"],
450-
"etag_data": etag_body,
451-
},
452-
)
445+
# Complete multipart upload *only* if the backend issued an upload_id
446+
first_part = url_data[0]
447+
upload_id = first_part.get("upload_id")
448+
if upload_id:
449+
etag_body = [
450+
{"ETag": entry["ETag"], "PartNumber": i + 1}
451+
for i, entry in enumerate(etag_data)
452+
]
453+
self._post_or_put(
454+
method="POST",
455+
sub_url="data_entries/raw_data/staged/upload_urls/complete/",
456+
params={"staging_type": "core"},
457+
body={
458+
"upload_id": upload_id,
459+
"new_filename": first_part["new_filename"],
460+
"etag_data": etag_body,
461+
},
462+
)
453463

454464
main_task = None
455465
file_count = len(file_data)

src/atomicds/core/utils.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
import os
2+
import re
3+
import unicodedata
4+
from pathlib import Path
5+
16
import networkx as nx
27
import numpy as np
38
import numpy.typing as npt
@@ -224,3 +229,46 @@ def render(self, task) -> Text:
224229
transient=transient,
225230
refresh_per_second=30,
226231
)
232+
233+
234+
def normalize_path(path_str: str) -> Path:
235+
"""Normalize a file path string for use with pathlib.
236+
237+
This will:
238+
1. Remove control characters and convert “smart” quotes into plain quotes.
239+
2. Strip leading/trailing whitespace and any surrounding quotes.
240+
3. Expand user (~) and environment variables.
241+
4. Normalize Unicode, unify separators, and collapse “..”/“.” segments.
242+
243+
Args:
244+
path_str: Raw path string copied from Windows (may contain spaces,
245+
smart quotes, stray control chars, etc.)
246+
247+
Returns:
248+
A pathlib.Path pointing to the normalized path.
249+
"""
250+
# 1. Drop control characters
251+
filtered = "".join(ch for ch in path_str if unicodedata.category(ch)[0] != "C")
252+
253+
# 2. Convert smart quotes to plain ones
254+
smart_quotes = {"\u201c": '"', "\u201d": '"', "\u2018": "'", "\u2019": "'"}
255+
for smart, plain in smart_quotes.items():
256+
filtered = filtered.replace(smart, plain)
257+
258+
# 3. Trim whitespace and surrounding quotes
259+
filtered = filtered.strip()
260+
m = re.match(r'^[\'"](.*)[\'"]$', filtered)
261+
if m:
262+
filtered = m.group(1)
263+
264+
# 4. Expand ~ and env vars
265+
expanded = os.path.expanduser(os.path.expandvars(filtered)) # noqa: PTH111
266+
267+
# 5. Normalize Unicode and separators
268+
normalized_unicode = unicodedata.normalize("NFC", expanded)
269+
unified_sep = normalized_unicode.replace("/", os.sep)
270+
271+
# 6. Collapse redundant segments
272+
final_path = os.path.normpath(unified_sep)
273+
274+
return Path(final_path)

tests/data/test_rheed.mp4

4.17 KB
Binary file not shown.

tests/test_client.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import pytest
33
from atomicds import Client
44
from datetime import datetime
5+
from pathlib import Path
56
from unittest import mock
7+
from urllib.parse import urljoin
68
from .conftest import ResultIDs
7-
from atomicds.results import RHEEDVideoResult
89

910

1011
@pytest.fixture
@@ -113,3 +114,36 @@ def test_get(client: Client):
113114
data_types = set([type(result) for result in results])
114115

115116
assert len(data_types) == 3
117+
118+
119+
# @pytest.mark.order(2)
120+
# @pytest.mark.dependency(name="upload", dependds=["get"])
121+
# def test_upload(client: Client):
122+
# test_video = str(Path(__file__).parent.absolute()) + "/data/test_rheed.mp4"
123+
# client.upload(files=[test_video])
124+
#
125+
#
126+
# @pytest.mark.order(3)
127+
# @pytest.mark.dependency(depends=["upload"])
128+
# def test_download(client: Client):
129+
# # Get data IDs from uploaded test files
130+
# data = client.search(keywords=["test_rheed"], include_organization_data=False)
131+
# assert len(data["Data ID"].values)
132+
#
133+
# data_ids = list(data["Data ID"].values)
134+
# client.download_videos(data_ids=data_ids, dest_dir="./")
135+
#
136+
# # Cleanup downloaded files
137+
# for data_id in data_ids:
138+
# file_path = Path("./") / f"{data_id}.mp4"
139+
# if file_path.exists():
140+
# file_path.unlink()
141+
#
142+
# response = client.session.delete(
143+
# url=urljoin(client.endpoint, "/data_entries"),
144+
# verify=True,
145+
# params={"data_ids": data_ids},
146+
# )
147+
# assert (
148+
# response.ok
149+
# ), f"Failed to delete data entries: {response.status_code} - {response.text}"

0 commit comments

Comments
 (0)