Skip to content

Commit 3891504

Browse files
[WiP] Refactoring before 0.2 (#41)
* Use logging for debug logs * Rename load_nvidia_functions to load_library_functions and add debug_log instead of env * Do not automatically search /opt/rocm (update README.md) * Cleanup license comment * Fix vllm testing * Update version to 0.2 Signed-off-by: Takeshi Yoshimura <[email protected]>
1 parent 09c69de commit 3891504

27 files changed

+200
-153
lines changed

Dockerfile.build

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
FROM quay.io/pypa/manylinux2014_x86_64

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
PODMAN := $(shell podman -v 2> /dev/null)
@@ -21,6 +20,10 @@ unittest:
2120
TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_1 CUDA_VISIBLE_DEVICES="" pytest -s --cov=$(FST_DIR) tests/test_fastsafetensors.py && \
2221
TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_2 pytest -s --cov=$(FST_DIR) -s tests/test_vllm.py
2322

23+
test-vllm:
24+
@FST_DIR=$(FST_DIR); \
25+
TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_2 pytest -s --cov=$(FST_DIR) -s tests/test_vllm.py
26+
2427
unittest-parallel:
2528
TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_3 torchrun --nnodes=4 --master_addr=0.0.0.0 --master_port=1234 --node_rank=0 tests/test_multi.py --cov=$(FST_DIR) -s tests/test_multi.py > /tmp/3.log 2>&1 & \
2629
TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_4 torchrun --nnodes=4 --master_addr=0.0.0.0 --master_port=1234 --node_rank=1 tests/test_multi.py --cov=$(FST_DIR) -s tests/test_multi.py > /tmp/4.log 2>&1 & \

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ The performance gain example can be found at [amd-perf.md](./docs/amd-perf.md)
7272
### Install from Github Source
7373

7474
```bash
75-
pip install git+https://github.com/foundation-model-stack/fastsafetensors.git
75+
ROCM_PATH=/opt/rocm pip install git+https://github.com/foundation-model-stack/fastsafetensors.git
7676
```
7777

7878
### Install from source
7979

8080
```bash
81-
pip install .
81+
ROCM_PATH=/opt/rocm pip install .
8282
```

examples/tgis_weight.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
import glob

fastsafetensors/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
from importlib.metadata import version

fastsafetensors/common.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
import json
4+
import logging
55
import os
66
import sys
77
from collections import OrderedDict
@@ -14,6 +14,20 @@
1414
from .st_types import Device, DType
1515

1616

17+
def init_logger(name: str):
18+
return logging.getLogger(name)
19+
20+
21+
def set_debug():
22+
logging.basicConfig(
23+
format="[%(levelname)s] %(message)s", level=logging.DEBUG, force=True
24+
)
25+
26+
27+
def is_debug(logger: logging.Logger) -> bool:
28+
return logger.isEnabledFor(logging.DEBUG)
29+
30+
1731
def is_gpu_found():
1832
"""Check if any GPU (CUDA or HIP) is available.
1933
@@ -81,7 +95,7 @@ def __init__(
8195
f"validate(tensor {k}): InvalidOffset s={s}, start={start}, e={e}, src={src}"
8296
)
8397
# if (header_length + s) % CUDA_PTR_ALIGN > 0:
84-
# print(f"[WARNING] misaligned tensor is detected at {header_length + s}. this will cause cuda pointer alignment errors later.")
98+
# logger.warning(f"misaligned tensor is detected at {header_length + s}. this will cause cuda pointer alignment errors later.")
8599
start = e
86100
nelements = 1
87101
for sh in t.shape:

fastsafetensors/copier/example_copier.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ def __init__(
1616
device: Device,
1717
reader,
1818
framework: FrameworkOpBase,
19-
debug_log: bool = False,
2019
):
2120
pass
2221

@@ -49,8 +48,7 @@ def construct_copier(
4948
metadata: SafeTensorsMetadata,
5049
device: Device,
5150
framework: FrameworkOpBase,
52-
debug_log: bool = False,
5351
) -> CopierInterface:
54-
return ExampleCopier(metadata, device, reader, framework, debug_log)
52+
return ExampleCopier(metadata, device, reader, framework)
5553

5654
return construct_copier

fastsafetensors/copier/gds.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
import warnings
54
from typing import Dict, Optional
65

76
from .. import cpp as fstcpp
8-
from ..common import SafeTensorsMetadata, is_gpu_found
7+
from ..common import SafeTensorsMetadata, init_logger, is_gpu_found
98
from ..frameworks import FrameworkOpBase, TensorBase
109
from ..st_types import Device, DeviceType, DType
1110
from .base import CopierInterface
1211
from .nogds import NoGdsFileCopier
1312

13+
logger = init_logger(__name__)
14+
1415

1516
class GdsFileCopier(CopierInterface):
1617
def __init__(
@@ -19,13 +20,11 @@ def __init__(
1920
device: Device,
2021
reader: fstcpp.gds_file_reader,
2122
framework: FrameworkOpBase,
22-
debug_log: bool = False,
2323
):
2424
self.framework = framework
2525
self.metadata = metadata
2626
self.device = device
2727
self.reader = reader
28-
self.debug_log = debug_log
2928
self.gbuf = None
3029
self.fh: Optional[fstcpp.gds_file_handle] = None
3130
self.copy_reqs: Dict[int, int] = {}
@@ -143,15 +142,13 @@ def wait_io(
143142
l = self.aligned_length - misaligned_bytes - count
144143
if l > length:
145144
l = length
146-
if self.debug_log:
147-
print(
148-
"wait_io: fix misalignment, src=0x{:x}, misaligned_bytes={}, count={}, tmp=0x{:x}".format(
149-
gbuf.get_base_address(),
150-
misaligned_bytes,
151-
count,
152-
tmp_gbuf.get_base_address(),
153-
)
154-
)
145+
logger.debug(
146+
"wait_io: fix misalignment, src=0x%x, misaligned_bytes=%d, count=%d, tmp=0x%x",
147+
gbuf.get_base_address(),
148+
misaligned_bytes,
149+
count,
150+
tmp_gbuf.get_base_address(),
151+
)
155152
gbuf.memmove(count, misaligned_bytes + count, tmp_gbuf, l)
156153
count += l
157154
self.framework.free_tensor_memory(tmp_gbuf, self.device)
@@ -200,9 +197,8 @@ def construct_nogds_copier(
200197
metadata: SafeTensorsMetadata,
201198
device: Device,
202199
framework: FrameworkOpBase,
203-
debug_log: bool = False,
204200
) -> CopierInterface:
205-
return NoGdsFileCopier(metadata, device, nogds_reader, framework, debug_log)
201+
return NoGdsFileCopier(metadata, device, nogds_reader, framework)
206202

207203
return construct_nogds_copier
208204

@@ -212,8 +208,7 @@ def construct_copier(
212208
metadata: SafeTensorsMetadata,
213209
device: Device,
214210
framework: FrameworkOpBase,
215-
debug_log: bool = False,
216211
) -> CopierInterface:
217-
return GdsFileCopier(metadata, device, reader, framework, debug_log)
212+
return GdsFileCopier(metadata, device, reader, framework)
218213

219214
return construct_copier

fastsafetensors/copier/nogds.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# Copyright 2024 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
import os
@@ -7,7 +6,7 @@
76
from .. import cpp as fstcpp
87
from ..common import SafeTensorsMetadata
98
from ..frameworks import FrameworkOpBase, TensorBase
10-
from ..st_types import Device, DeviceType, DType
9+
from ..st_types import Device, DType
1110
from .base import CopierInterface
1211

1312

@@ -18,7 +17,6 @@ def __init__(
1817
device: Device,
1918
reader: fstcpp.nogds_file_reader,
2019
framework: FrameworkOpBase,
21-
debug_log: bool = False,
2220
):
2321
self.framework = framework
2422
self.metadata = metadata
@@ -29,7 +27,6 @@ def __init__(
2927
f"NoGdsFileCopier.__init__: failed to open, file={metadata.src}"
3028
)
3129
self.device = device
32-
self.debug_log = debug_log
3330
self.reqs: List[int] = []
3431

3532
def submit_io(

fastsafetensors/cpp.pyi

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# Copyright 2025 IBM Inc. All rights reserved
21
# SPDX-License-Identifier: Apache-2.0
32

43
class gds_device_buffer:
@@ -56,7 +55,7 @@ def cpu_malloc(length: int) -> int: ...
5655
def cpu_free(addr: int) -> None: ...
5756
def gpu_malloc(length: int) -> int: ...
5857
def gpu_free(addr: int) -> None: ...
59-
def load_nvidia_functions() -> None: ...
58+
def load_library_functions() -> None: ...
6059
def get_cpp_metrics() -> cpp_metrics: ...
6160
def set_gil_release(gil_release: bool) -> None: ...
6261
def get_gil_release() -> bool: ...

0 commit comments

Comments
 (0)