Skip to content

Commit 0bee50a

Browse files
yurekamiclaude
andcommitted
Use PurePosixPath for cross-platform path handling (#15113)
Replace string split("/") with PurePosixPath().name for extracting filenames from cloud storage paths, URLs, and HuggingFace repo names. While these paths use forward slashes by convention (not affected by the host OS), using PurePosixPath is more explicit, self-documenting, and follows Python best practices for path manipulation. Files modified: - nemo/core/connectors/save_restore_connector.py (cloud storage paths) - nemo/core/classes/common.py (cloud URLs and HuggingFace repo names) Note: tarutils.py and nemo_file.py were NOT modified as they handle tar archive internal paths (which mandate POSIX format) and dictionary keys (not filesystem paths) respectively. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]> Signed-off-by: yurekami <[email protected]>
1 parent d32d4fc commit 0bee50a

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

nemo/core/classes/common.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from dataclasses import dataclass, field
2929
from enum import Enum
3030
from functools import total_ordering
31-
from pathlib import Path
31+
from pathlib import Path, PurePosixPath
3232
from typing import Any, Dict, List, Optional, Tuple, Union
3333

3434
import hydra
@@ -934,7 +934,8 @@ def _get_ngc_pretrained_model_info(cls, model_name: str, refresh_cache: bool = F
934934
f"Model {model_name} was not found. Check cls.list_available_models()\n"
935935
f"for the list of all available models."
936936
)
937-
filename = location_in_the_cloud.split("/")[-1]
937+
# Use PurePosixPath for cloud URLs which always use forward slashes
938+
filename = PurePosixPath(location_in_the_cloud).name
938939
url = location_in_the_cloud.replace(filename, "")
939940
cache_dir = Path.joinpath(resolve_cache_dir(), f'{filename[:-5]}')
940941
# If either description and location in the cloud changes, this will force re-download
@@ -972,7 +973,8 @@ def _get_hf_hub_pretrained_model_info(cls, model_name: str, refresh_cache: bool
972973
- The path to the NeMo model (.nemo file) in some cached directory (managed by HF Hub).
973974
"""
974975
# Resolve the model name without origin for filename
975-
resolved_model_filename = model_name.split("/")[-1] + '.nemo'
976+
# Use PurePosixPath since HuggingFace repo names use forward slashes (e.g., "nvidia/model-name")
977+
resolved_model_filename = PurePosixPath(model_name).name + '.nemo'
976978

977979
# Try to take from cache first - if not fallback to options below
978980
if not refresh_cache:

nemo/core/connectors/save_restore_connector.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import time
2222
import uuid
2323
from contextlib import contextmanager
24+
from pathlib import PurePosixPath
2425
from typing import Callable, Generator, Optional, Set, Union
2526

2627
import torch
@@ -610,7 +611,8 @@ def _make_nemo_file_from_folder(filename, source_dir):
610611
@staticmethod
611612
def _make_nemo_file_from_folder_with_multistorageclient(filename, source_dir):
612613
msc = import_multistorageclient()
613-
filename_with_extension = filename.split("/")[-1] # get the filename and extension
614+
# Use PurePosixPath for cloud storage paths which always use forward slashes
615+
filename_with_extension = PurePosixPath(filename).name
614616
with tempfile.TemporaryDirectory() as tmpdir:
615617
tar_file = os.path.join(tmpdir, filename_with_extension)
616618
with tarfile.open(tar_file, "w:") as tar:
@@ -726,7 +728,8 @@ def _unpack_nemo_file_with_multistorageclient(
726728
raise FileNotFoundError(f"{path2file} does not exist")
727729

728730
with tempfile.TemporaryDirectory() as tmpdir:
729-
filename_with_extension = path2file.split("/")[-1] # get the filename with extension
731+
# Use PurePosixPath for cloud storage paths which always use forward slashes
732+
filename_with_extension = PurePosixPath(path2file).name
730733
downloaded_file_path = os.path.join(tmpdir, filename_with_extension)
731734
start_time = time.time()
732735
msc.download_file(path2file, downloaded_file_path)

0 commit comments

Comments
 (0)