Skip to content

Commit bc2c2a8

Browse files
validated case1 missing dep + case2 custom cuda, yet missing lib
1 parent 43ee60f commit bc2c2a8

File tree

1 file changed

+162
-50
lines changed

1 file changed

+162
-50
lines changed

bitsandbytes/cextension.py

Lines changed: 162 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def parse_cuda_version(version_str: str) -> str:
8383
return version_str # fallback as safety net
8484

8585

86-
def _format_cuda_error_message(
86+
def _format_lib_error_message(
8787
available_versions: list[str],
8888
user_cuda_version: str,
8989
original_error: str = "",
@@ -94,7 +94,7 @@ def _format_cuda_error_message(
9494
no_cuda_lib_found = requested_version not in available_versions
9595

9696
if no_cpu_lib_found:
97-
analysis = "🚨 Needed to load CPU-only bitsandbytes library, but it's not available 🚨\n\n"
97+
analysis = "🚨 Failed to load CPU-only bitsandbytes library 🚨\n\n"
9898

9999
elif no_cuda_lib_found:
100100
version_list_str = "\n - " + "\n - ".join(available_versions) if available_versions else "NONE"
@@ -104,36 +104,45 @@ def _format_cuda_error_message(
104104
f"Detected PyTorch CUDA version: {user_cuda_version}\n"
105105
f"Available pre-compiled versions: {version_list_str}\n\n"
106106
"This means:\n"
107-
"1. The version you're trying to use is NOT distributed with this package\n"
107+
"The version you're trying to use is NOT distributed with this package\n\n"
108108
if available_versions
109109
else "1. You're not using the package but checked-out the source code\n"
110110
"2. You MUST compile from source for this specific CUDA version\n"
111-
"3. The installation will NOT work until you compile or choose a CUDA supported version\n\n"
111+
"3. The installation will NOT work until you compile or choose a CUDA supported version via `export BNB_CUDA_VERSION=<version>`\n\n"
112112
)
113113

114114
base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
115115

116116
troubleshooting = (
117-
"This typically happens when:\n1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
118-
if no_cuda_lib_found
119-
else "1. You checked the code out from source and your torch installation doesn't detect CUDA on your machine\n"
120-
"2. The library wasn't compiled properly during installation from source\n"
121-
"3. Missing CUDA dependencies\n\n"
117+
(
118+
"This typically happens when:\n"
119+
"1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
120+
"2. The library wasn't compiled properly during installation from source\n"
121+
"3. Missing CUDA dependencies\n\n"
122+
)
122123
if no_cuda_lib_found
123-
else ""
124+
else "This typically happens when you checked the code out from source and your torch installation doesn't detect CUDA on your machine.\n\n"
124125
)
125126

126127
note = (
127-
"To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
128-
"If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
128+
(
129+
"To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
130+
"If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
131+
)
132+
if no_cuda_lib_found
133+
else ""
129134
)
130135

131136
compile_instructions = (
132-
"You have three options:\n"
133-
"1. COMPILE FROM SOURCE (required if no binary exists):\n"
134-
" https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
135-
"2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n"
136-
"3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
137+
(
138+
"You have three options:\n"
139+
"1. COMPILE FROM SOURCE (required if no binary exists):\n"
140+
" https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
141+
"2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n"
142+
"3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
143+
)
144+
if no_cuda_lib_found
145+
else "COMPILE FROM SOURCE for CPU-only:\n `cmake -DCOMPUTE_BACKEND=cpu -S . && make`\n\n"
137146
)
138147

139148
diagnostics = (
@@ -149,7 +158,7 @@ def _format_cuda_error_message(
149158
return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
150159

151160

152-
class MockBNBNativeLibrary(BNBNativeLibrary):
161+
class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary):
153162
"""
154163
Mock BNBNativeLibrary that raises an error when trying to use native library
155164
functionality without successfully loading the library.
@@ -160,24 +169,133 @@ class MockBNBNativeLibrary(BNBNativeLibrary):
160169
def __init__(self, error_msg: str):
161170
self.error_msg = error_msg
162171
self.user_cuda_version = get_cuda_version_tuple()
172+
self.available_versions = get_available_cuda_binary_versions()
173+
self.override_value = os.environ.get("BNB_CUDA_VERSION")
174+
self.requested_version = (
175+
parse_cuda_version(self.override_value)
176+
if self.override_value
177+
else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
178+
if self.user_cuda_version
179+
else "unknown"
180+
)
163181

164-
def __getattr__(self, name):
165-
available_versions = get_available_cuda_binary_versions()
166-
override_value = os.environ.get("BNB_CUDA_VERSION")
182+
# Pre-generate the error message based on error type
183+
if "cannot open shared object file" in error_msg:
184+
self.formatted_error = self._format_dependency_error()
185+
else: # lib loading errors
186+
self.formatted_error = self._format_lib_error_message(
187+
available_versions=self.available_versions,
188+
user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
189+
if self.user_cuda_version
190+
else "unknown",
191+
original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
192+
requested_version=self.requested_version,
193+
)
167194

168-
requested_version = (
169-
parse_cuda_version(override_value)
170-
if override_value
171-
else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
195+
def _format_lib_error_message(
196+
self,
197+
available_versions: list[str],
198+
user_cuda_version: str,
199+
original_error: str = "",
200+
requested_version: Optional[str] = None,
201+
) -> str:
202+
"""Format detailed error message for library loading failures"""
203+
analysis = ""
204+
no_cpu_lib_found = "libbitsandbytes_cpu.so: cannot open" in original_error
205+
no_cuda_lib_found = "CUDA binary not found" in original_error
206+
207+
if no_cpu_lib_found:
208+
analysis = "\n🚨 Failed to load CPU-only bitsandbytes library 🚨\n\n"
209+
210+
elif no_cuda_lib_found:
211+
version_list_str = "\n - " + "\n - ".join(available_versions) if available_versions else "NONE"
212+
analysis = (
213+
f"\n🚨 CUDA VERSION MISMATCH 🚨\n"
214+
f"Requested CUDA version: {requested_version}\n"
215+
f"Detected PyTorch CUDA version: {user_cuda_version}\n"
216+
f"Available pre-compiled versions: {version_list_str}\n\n"
217+
"This means:\n"
218+
"The version you're trying to use is NOT distributed with this package\n\n"
219+
if available_versions
220+
else "1. You're not using the package but checked-out the source code\n"
221+
"2. You MUST compile from source for this specific CUDA version\n"
222+
"3. The installation will NOT work until you compile or choose a CUDA supported version via export BNB_CUDA_VERSION=<version>\n\n"
223+
)
224+
225+
base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
226+
227+
troubleshooting = (
228+
(
229+
"This typically happens when:\n"
230+
"1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
231+
"2. The library wasn't compiled properly during installation from source\n\n"
232+
)
233+
if no_cuda_lib_found
234+
else "This typically happens when you checked the code out from source and your torch installation doesn't detect CUDA on your machine.\n\n"
172235
)
173236

174-
msg = _format_cuda_error_message(
175-
available_versions=available_versions,
176-
user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}",
177-
original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
178-
requested_version=requested_version,
237+
note = (
238+
(
239+
"To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
240+
"If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
241+
)
242+
if no_cuda_lib_found
243+
else ""
179244
)
180-
raise RuntimeError(msg)
245+
246+
compile_instructions = (
247+
(
248+
"You have two options:\n"
249+
"1. COMPILE FROM SOURCE (required if no binary exists):\n"
250+
" https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
251+
"2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n\n"
252+
)
253+
if no_cuda_lib_found
254+
else "COMPILE FROM SOURCE for CPU-only:\n `cmake -DCOMPUTE_BACKEND=cpu -S . && make`\n\n"
255+
)
256+
257+
diagnostics = (
258+
"🔍 Run this command for detailed diagnostics:\n"
259+
"python -m bitsandbytes\n\n"
260+
"If you've tried everything and still have issues:\n"
261+
"1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
262+
"2. Describe what you've tried in detail\n"
263+
"3. Open an issue with this information:\n"
264+
" https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
265+
)
266+
267+
return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
268+
269+
def _format_dependency_error(self) -> str:
270+
"""Format error message for missing shared libraries"""
271+
# Extract missing library name from error
272+
error_parts = self.error_msg.split(":")
273+
missing_lib = error_parts[0].strip() if len(error_parts) > 0 else "unknown library"
274+
cuda_major_version = (
275+
self.requested_version.split(".")[0] if "." in self.requested_version else self.requested_version
276+
)
277+
278+
return (
279+
f"\n🚨 CUDA SETUP ERROR: Missing dependency: {missing_lib} 🚨\n\n"
280+
f"CUDA {cuda_major_version}.x runtime libraries were not found in the LD_LIBRARY_PATH.\n\n"
281+
f"To fix this, make sure that:\n"
282+
f"1. You have installed CUDA {cuda_major_version}.x toolkit on your system\n"
283+
f"2. The CUDA runtime libraries are in your LD_LIBRARY_PATH\n\n"
284+
f"You can add them with (and persist the change by adding the line to your .bashrc):\n"
285+
f" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/cuda-{cuda_major_version}.x/lib64\n\n"
286+
f"Original error: {self.error_msg}\n\n"
287+
f"🔍 Run this command for detailed diagnostics:\n"
288+
f"python -m bitsandbytes\n\n"
289+
f"If you've tried everything and still have issues:\n"
290+
f"1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
291+
f"2. Describe what you've tried in detail\n"
292+
f"3. Open an issue with this information:\n"
293+
f" https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
294+
)
295+
296+
def __getattr__(self, name):
297+
"""Raise error with detailed message when any attribute is accessed"""
298+
raise RuntimeError(f"{self.formatted_error}Native code method attempted to access: lib.{name}()")
181299

182300
def __getitem__(self, name):
183301
return self.__getattr__(name)
@@ -187,43 +305,37 @@ def get_native_library() -> BNBNativeLibrary:
187305
"""
188306
Load CUDA library XOR CPU, as the latter contains a subset of symbols of the former.
189307
"""
190-
binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
191308
cuda_specs = get_cuda_specs()
309+
binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
310+
192311
if cuda_specs:
193312
cuda_binary_path = get_cuda_bnb_library_path(cuda_specs)
194-
if cuda_binary_path.exists():
195-
binary_path = cuda_binary_path
196-
else:
197-
available_versions = get_available_cuda_binary_versions()
198-
env_version = os.environ.get("BNB_CUDA_VERSION")
199-
200-
requested_version = parse_cuda_version(env_version) if env_version else cuda_specs.cuda_version_string
201-
202-
msg = _format_cuda_error_message(
203-
available_versions=available_versions,
204-
user_cuda_version=cuda_specs.cuda_version_string,
205-
requested_version=requested_version,
206-
)
207-
logger.warning(msg)
313+
314+
if not cuda_binary_path.exists():
315+
raise RuntimeError(f"Configured CUDA binary not found at {cuda_binary_path}")
316+
317+
binary_path = cuda_binary_path
208318

209319
logger.debug(f"Loading bitsandbytes native library from: {binary_path}")
320+
321+
# Try to load the library - any errors will propagate up
210322
dll = ct.cdll.LoadLibrary(str(binary_path))
211323

212324
if hasattr(dll, "get_context"): # only a CUDA-built library exposes this
213325
return CudaBNBNativeLibrary(dll)
214326

215327
logger.warning(
216328
"The installed version of bitsandbytes was compiled without GPU support. "
217-
"8-bit optimizers and GPU quantization are unavailable.",
329+
"8-bit optimizers and GPU quantization are unavailable."
218330
)
219331
return BNBNativeLibrary(dll)
220332

221333

222334
try:
223335
lib = get_native_library()
224336
except Exception as e:
225-
error_msg = f"Could not load bitsandbytes native library: {e}"
226-
logger.error(error_msg, exc_info=False)
337+
error_msg = str(e)
338+
logger.error(f"bitsandbytes library load error: {error_msg}\n", exc_info=True)
227339

228340
# create a mock with error messaging as fallback
229-
lib = MockBNBNativeLibrary(error_msg)
341+
lib = ErrorHandlerMockBNBNativeLibrary(error_msg)

0 commit comments

Comments
 (0)