foundation-model-stack
diff --git a/‎Dockerfile.build‎
Lines changed: 0 additions & 1 deletion b/‎Dockerfile.build‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 4 additions & 1 deletion b/‎Makefile‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/tgis_weight.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/tgis_weight.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎fastsafetensors/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎fastsafetensors/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎fastsafetensors/common.py‎
Lines changed: 16 additions & 2 deletions b/‎fastsafetensors/common.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎fastsafetensors/copier/example_copier.py‎
Lines changed: 1 addition & 3 deletions b/‎fastsafetensors/copier/example_copier.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎fastsafetensors/copier/gds.py‎
Lines changed: 12 additions & 17 deletions b/‎fastsafetensors/copier/gds.py‎
Lines changed: 12 additions & 17 deletions
diff --git a/‎fastsafetensors/copier/nogds.py‎
Lines changed: 1 addition & 4 deletions b/‎fastsafetensors/copier/nogds.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎fastsafetensors/cpp.pyi‎
Lines changed: 1 addition & 2 deletions b/‎fastsafetensors/cpp.pyi‎
Lines changed: 1 addition & 2 deletions
@@ -1,4 +1,3 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 FROM quay.io/pypa/manylinux2014_x86_64
 
@@ -1,4 +1,3 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 PODMAN := $(shell podman -v 2> /dev/null)
@@ -21,6 +20,10 @@ unittest:
 	TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_1 CUDA_VISIBLE_DEVICES="" pytest -s --cov=$(FST_DIR) tests/test_fastsafetensors.py && \
 	TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_2 pytest -s --cov=$(FST_DIR) -s tests/test_vllm.py
 
+test-vllm:
+	@FST_DIR=$(FST_DIR); \
+	TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_2 pytest -s --cov=$(FST_DIR) -s tests/test_vllm.py
+
 unittest-parallel:
 	TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_3 torchrun --nnodes=4 --master_addr=0.0.0.0 --master_port=1234 --node_rank=0 tests/test_multi.py --cov=$(FST_DIR) -s tests/test_multi.py > /tmp/3.log 2>&1 & \
 	TEST_FASTSAFETENSORS_FRAMEWORK=torch COVERAGE_FILE=.coverage_4 torchrun --nnodes=4 --master_addr=0.0.0.0 --master_port=1234 --node_rank=1 tests/test_multi.py --cov=$(FST_DIR) -s tests/test_multi.py > /tmp/4.log 2>&1 & \
 
@@ -72,11 +72,11 @@ The performance gain example can be found at [amd-perf.md](./docs/amd-perf.md)
 ### Install from Github Source
 
 ```bash
-pip install git+https://github.com/foundation-model-stack/fastsafetensors.git
+ROCM_PATH=/opt/rocm pip install git+https://github.com/foundation-model-stack/fastsafetensors.git
 ```
 
 ### Install from source
 
 ```bash
-pip install .
+ROCM_PATH=/opt/rocm pip install .
 ```
@@ -1,4 +1,3 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 import glob
 
@@ -1,4 +1,3 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 from importlib.metadata import version
 
@@ -1,7 +1,7 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 import json
+import logging
 import os
 import sys
 from collections import OrderedDict
@@ -14,6 +14,20 @@
 from .st_types import Device, DType
 
 
+def init_logger(name: str):
+    return logging.getLogger(name)
+
+
+def set_debug():
+    logging.basicConfig(
+        format="[%(levelname)s] %(message)s", level=logging.DEBUG, force=True
+    )
+
+
+def is_debug(logger: logging.Logger) -> bool:
+    return logger.isEnabledFor(logging.DEBUG)
+
+
 def is_gpu_found():
     """Check if any GPU (CUDA or HIP) is available.
 
@@ -81,7 +95,7 @@ def __init__(
                     f"validate(tensor {k}): InvalidOffset s={s}, start={start}, e={e}, src={src}"
                 )
             # if (header_length + s) % CUDA_PTR_ALIGN > 0:
-            #    print(f"[WARNING] misaligned tensor is detected at {header_length + s}. this will cause cuda pointer alignment errors later.")
+            #    logger.warning(f"misaligned tensor is detected at {header_length + s}. this will cause cuda pointer alignment errors later.")
             start = e
             nelements = 1
             for sh in t.shape:
 
@@ -16,7 +16,6 @@ def __init__(
         device: Device,
         reader,
         framework: FrameworkOpBase,
-        debug_log: bool = False,
     ):
         pass
 
@@ -49,8 +48,7 @@ def construct_copier(
         metadata: SafeTensorsMetadata,
         device: Device,
         framework: FrameworkOpBase,
-        debug_log: bool = False,
     ) -> CopierInterface:
-        return ExampleCopier(metadata, device, reader, framework, debug_log)
+        return ExampleCopier(metadata, device, reader, framework)
 
     return construct_copier
@@ -1,16 +1,17 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 import warnings
 from typing import Dict, Optional
 
 from .. import cpp as fstcpp
-from ..common import SafeTensorsMetadata, is_gpu_found
+from ..common import SafeTensorsMetadata, init_logger, is_gpu_found
 from ..frameworks import FrameworkOpBase, TensorBase
 from ..st_types import Device, DeviceType, DType
 from .base import CopierInterface
 from .nogds import NoGdsFileCopier
 
+logger = init_logger(__name__)
+
 
 class GdsFileCopier(CopierInterface):
     def __init__(
@@ -19,13 +20,11 @@ def __init__(
         device: Device,
         reader: fstcpp.gds_file_reader,
         framework: FrameworkOpBase,
-        debug_log: bool = False,
     ):
         self.framework = framework
         self.metadata = metadata
         self.device = device
         self.reader = reader
-        self.debug_log = debug_log
         self.gbuf = None
         self.fh: Optional[fstcpp.gds_file_handle] = None
         self.copy_reqs: Dict[int, int] = {}
@@ -143,15 +142,13 @@ def wait_io(
                 l = self.aligned_length - misaligned_bytes - count
                 if l > length:
                     l = length
-                if self.debug_log:
-                    print(
-                        "wait_io: fix misalignment, src=0x{:x}, misaligned_bytes={}, count={}, tmp=0x{:x}".format(
-                            gbuf.get_base_address(),
-                            misaligned_bytes,
-                            count,
-                            tmp_gbuf.get_base_address(),
-                        )
-                    )
+                logger.debug(
+                    "wait_io: fix misalignment, src=0x%x, misaligned_bytes=%d, count=%d, tmp=0x%x",
+                    gbuf.get_base_address(),
+                    misaligned_bytes,
+                    count,
+                    tmp_gbuf.get_base_address(),
+                )
                 gbuf.memmove(count, misaligned_bytes + count, tmp_gbuf, l)
                 count += l
             self.framework.free_tensor_memory(tmp_gbuf, self.device)
@@ -200,9 +197,8 @@ def construct_nogds_copier(
             metadata: SafeTensorsMetadata,
             device: Device,
             framework: FrameworkOpBase,
-            debug_log: bool = False,
         ) -> CopierInterface:
-            return NoGdsFileCopier(metadata, device, nogds_reader, framework, debug_log)
+            return NoGdsFileCopier(metadata, device, nogds_reader, framework)
 
         return construct_nogds_copier
 
@@ -212,8 +208,7 @@ def construct_copier(
         metadata: SafeTensorsMetadata,
         device: Device,
         framework: FrameworkOpBase,
-        debug_log: bool = False,
     ) -> CopierInterface:
-        return GdsFileCopier(metadata, device, reader, framework, debug_log)
+        return GdsFileCopier(metadata, device, reader, framework)
 
     return construct_copier
@@ -1,4 +1,3 @@
-# Copyright 2024 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 import os
@@ -7,7 +6,7 @@
 from .. import cpp as fstcpp
 from ..common import SafeTensorsMetadata
 from ..frameworks import FrameworkOpBase, TensorBase
-from ..st_types import Device, DeviceType, DType
+from ..st_types import Device, DType
 from .base import CopierInterface
 
 
@@ -18,7 +17,6 @@ def __init__(
         device: Device,
         reader: fstcpp.nogds_file_reader,
         framework: FrameworkOpBase,
-        debug_log: bool = False,
     ):
         self.framework = framework
         self.metadata = metadata
@@ -29,7 +27,6 @@ def __init__(
                 f"NoGdsFileCopier.__init__: failed to open, file={metadata.src}"
             )
         self.device = device
-        self.debug_log = debug_log
         self.reqs: List[int] = []
 
     def submit_io(
 
@@ -1,4 +1,3 @@
-# Copyright 2025 IBM Inc. All rights reserved
 # SPDX-License-Identifier: Apache-2.0
 
 class gds_device_buffer:
@@ -56,7 +55,7 @@ def cpu_malloc(length: int) -> int: ...
 def cpu_free(addr: int) -> None: ...
 def gpu_malloc(length: int) -> int: ...
 def gpu_free(addr: int) -> None: ...
-def load_nvidia_functions() -> None: ...
+def load_library_functions() -> None: ...
 def get_cpp_metrics() -> cpp_metrics: ...
 def set_gil_release(gil_release: bool) -> None: ...
 def get_gil_release() -> bool: ...
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# Copyright 2024 IBM Inc. All rights reserved`
`2`	`1`	`# SPDX-License-Identifier: Apache-2.0`
`3`	`2`
`4`	`3`	`FROM quay.io/pypa/manylinux2014_x86_64`