Skip to content

Commit f57addd

Browse files
committed
Create hip diagnostics functions
1 parent f39ff48 commit f57addd

File tree

3 files changed

+105
-82
lines changed

3 files changed

+105
-82
lines changed

bitsandbytes/cextension.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def get_native_library() -> BNBNativeLibrary:
120120
hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2])
121121
HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor
122122
BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}"
123-
BNB_BACKEND = "ROCM"
123+
BNB_BACKEND = "ROCm"
124124
else:
125125
HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0
126126
BNB_HIP_VERSION_SHORT = ""

bitsandbytes/diagnostics/cuda.py

Lines changed: 100 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -108,59 +108,38 @@ def find_cudart_libraries() -> Iterator[Path]:
108108
yield from find_cuda_libraries_in_path_list(value)
109109

110110

111-
def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
112-
if not HIP_ENVIRONMENT:
113-
print(
114-
f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, "
115-
f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.",
116-
)
117-
else:
118-
print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}")
111+
def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
112+
print(
113+
f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, "
114+
f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.",
115+
)
119116

120117
binary_path = get_cuda_bnb_library_path(cuda_specs)
121118
if not binary_path.exists():
122-
if not HIP_ENVIRONMENT:
123-
print_dedented(
124-
f"""
125-
Library not found: {binary_path}. Maybe you need to compile it from source?
126-
If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`,
127-
for example, `make CUDA_VERSION=113`.
128-
129-
The CUDA version for the compile might depend on your conda install, if using conda.
130-
Inspect CUDA version via `conda list | grep cuda`.
131-
""",
132-
)
133-
else:
134-
print_dedented(
135-
f"""
136-
Library not found: {binary_path}.
137-
Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION
138-
in PyTorch Settings matches your ROCM install. If not, reinstall PyTorch for your ROCm version
139-
and rebuild bitsandbytes.
140-
""",
141-
)
119+
print_dedented(
120+
f"""
121+
Library not found: {binary_path}. Maybe you need to compile it from source?
122+
If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`,
123+
for example, `make CUDA_VERSION=113`.
124+
125+
The CUDA version for the compile might depend on your conda install, if using conda.
126+
Inspect CUDA version via `conda list | grep cuda`.
127+
""",
128+
)
142129

143130
cuda_major, cuda_minor = cuda_specs.cuda_version_tuple
144-
if not HIP_ENVIRONMENT:
145-
if cuda_major < 11:
146-
print_dedented(
147-
"""
148-
WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8().
149-
You will be only to use 8-bit optimizers and quantization routines!
150-
""",
151-
)
152-
153-
print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}")
154-
else:
155-
if (cuda_major, cuda_minor) < (6, 1):
156-
print_dedented(
157-
"""
158-
WARNING: bitandbytes is fully supported only from ROCm 6.1.
159-
""",
160-
)
131+
if cuda_major < 11:
132+
print_dedented(
133+
"""
134+
WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8().
135+
You will be only to use 8-bit optimizers and quantization routines!
136+
""",
137+
)
138+
139+
print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}")
161140

162141
# 7.5 is the minimum CC for cublaslt
163-
if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT:
142+
if not cuda_specs.has_cublaslt:
164143
print_dedented(
165144
"""
166145
WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!
@@ -174,44 +153,88 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
174153
# (2) Multiple CUDA versions installed
175154

176155

177-
def print_cuda_runtime_diagnostics() -> None:
156+
def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None:
157+
print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}")
158+
159+
binary_path = get_cuda_bnb_library_path(cuda_specs)
160+
if not binary_path.exists():
161+
print_dedented(
162+
f"""
163+
Library not found: {binary_path}.
164+
Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION
165+
in PyTorch Settings matches your ROCm install. If not, reinstall PyTorch for your ROCm version
166+
and rebuild bitsandbytes.
167+
""",
168+
)
169+
170+
hip_major, hip_minor = cuda_specs.cuda_version_tuple
171+
if (hip_major, hip_minor) < (6, 1):
172+
print_dedented(
173+
"""
174+
WARNING: bitsandbytes is fully supported only from ROCm 6.1.
175+
""",
176+
)
177+
178+
179+
def print_diagnostics(cuda_specs: CUDASpecs)) -> None:
180+
if HIP_ENVIRONMENT:
181+
_print_hip_diagnostics(cuda_specs)
182+
else:
183+
_print_cuda_diagnostics(cuda_specs)
184+
185+
186+
def _print_cuda_runtime_diagnostics() -> None:
187+
cudart_paths = list(find_cudart_libraries())
188+
if not cudart_paths:
189+
print("WARNING! CUDA runtime files not found in any environmental path.")
190+
elif len(cudart_paths) > 1:
191+
print_dedented(
192+
f"""
193+
Found duplicate CUDA runtime files (see below).
194+
195+
We select the PyTorch default CUDA runtime, which is {torch.version.cuda},
196+
but this might mismatch with the CUDA version that is needed for bitsandbytes.
197+
To override this behavior set the `BNB_CUDA_VERSION=<version string, e.g. 122>` environmental variable.
198+
199+
For example, if you want to use the CUDA version 122,
200+
BNB_CUDA_VERSION=122 python ...
201+
202+
OR set the environmental variable in your .bashrc:
203+
export BNB_CUDA_VERSION=122
204+
205+
In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g.
206+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2,
207+
""",
208+
)
209+
for pth in cudart_paths:
210+
print(f"* Found CUDA runtime at: {pth}")
211+
212+
213+
def _print_hip_runtime_diagnostics() -> None:
178214
cudart_paths = list(find_cudart_libraries())
179215
if not cudart_paths:
180-
print(f"{BNB_BACKEND} SETUP: WARNING! {BNB_BACKEND} runtime files not found in any environmental path.")
216+
print(f"WARNING! ROCm runtime files not found in any environmental path.")
181217
elif len(cudart_paths) > 1:
182-
backend_version = torch.version.cuda if not HIP_ENVIRONMENT else torch.version.hip
183218
print_dedented(
184219
f"""
185-
Found duplicate {BNB_BACKEND} runtime files (see below).
220+
Found duplicate ROCm runtime files (see below).
186221
187-
We select the PyTorch default {BNB_BACKEND} runtime, which is {backend_version},
188-
but this might mismatch with the {BNB_BACKEND} version that is needed for bitsandbytes.
222+
We select the PyTorch default ROCm runtime, which is {torch.version.hip},
223+
but this might mismatch with the ROCm version that is needed for bitsandbytes.
224+
225+
To resolve it, install PyTorch built for the ROCm version you want to use
226+
227+
and set LD_LIBRARY_PATH to your ROCm install path, e.g.
228+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-6.1.2/lib,
189229
""",
190230
)
191-
if not HIP_ENVIRONMENT:
192-
print_dedented(
193-
"""
194-
To override this behavior set the `BNB_CUDA_VERSION=<version string, e.g. 122>` environmental variable.
195-
196-
For example, if you want to use the CUDA version 122,
197-
BNB_CUDA_VERSION=122 python ...
198-
199-
OR set the environmental variable in your .bashrc:
200-
export BNB_CUDA_VERSION=122
201-
202-
In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g.
203-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2,
204-
""",
205-
)
206-
else:
207-
print_dedented(
208-
"""
209-
To resolve it, install PyTorch built for the ROCm version you want to use
210-
211-
and set LD_LIBRARY_PATH to your ROCm install path, e.g.
212-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/rocm-6.1.2,
213-
""",
214-
)
215231

216232
for pth in cudart_paths:
217-
print(f"* Found {BNB_BACKEND} runtime at: {pth}")
233+
print(f"* Found ROCm runtime at: {pth}")
234+
235+
236+
def print_runtime_diagnostics() -> None:
237+
if HIP_ENVIRONMENT:
238+
_print_hip_runtime_diagnostics()
239+
else:
240+
_print_cuda_runtime_diagnostics()

bitsandbytes/diagnostics/main.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from bitsandbytes.consts import PACKAGE_GITHUB_URL
88
from bitsandbytes.cuda_specs import get_cuda_specs
99
from bitsandbytes.diagnostics.cuda import (
10-
print_cuda_diagnostics,
11-
print_cuda_runtime_diagnostics,
10+
print_diagnostics,
11+
print_runtime_diagnostics,
1212
)
1313
from bitsandbytes.diagnostics.utils import print_dedented, print_header
1414

@@ -63,8 +63,8 @@ def main():
6363
print(f"2. {BNB_BACKEND} not installed")
6464
print(f"3. You have multiple conflicting {BNB_BACKEND} libraries")
6565
if cuda_specs:
66-
print_cuda_diagnostics(cuda_specs)
67-
print_cuda_runtime_diagnostics()
66+
print_diagnostics(cuda_specs)
67+
print_runtime_diagnostics()
6868
print_header("")
6969
print_header("DEBUG INFO END")
7070
print_header("")

0 commit comments

Comments
 (0)