diff --git a/Makefile b/Makefile index 817310e..6a3b5c0 100644 --- a/Makefile +++ b/Makefile @@ -100,8 +100,8 @@ cpuonly: $(BUILD_DIR) env HIP_INCLUDE := -I $(ROOT_DIR)/csrc -I $(ROOT_DIR)/include -# -I /opt/rocm-5.3.0/hipcub/include -HIP_LIB := -L/opt/rocm-5.3.0/lib -L/opt/rocm-5.3.0/llvm/bin/../lib/clang/15.0.0/lib/linux -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib64 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib64 -L/lib -L/usr/lib -lgcc_s -lgcc -lpthread -lm -lrt -lamdhip64 -lhipblas -lhipsparse -lclang_rt.builtins-x86_64 -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc +# -I /opt/rocm/hipcub/include +HIP_LIB := -L/opt/rocm/lib -L/opt/rocm/llvm/bin/../lib/clang/15.0.0/lib/linux -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib64 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib64 -L/lib -L/usr/lib -lgcc_s -lgcc -lpthread -lm -lrt -lamdhip64 -lhipblas -lhipsparse -lclang_rt.builtins-x86_64 -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc hip: $(BUILD_DIR) /usr/bin/hipcc -std=c++14 -c -fPIC --amdgpu-target=gfx1030 $(HIP_INCLUDE) -o $(BUILD_DIR)/ops.o -D NO_CUBLASLT $(CSRC)/ops.cu diff --git a/README.md b/README.md index 7d35a80..0a5596a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# bitsandbytes +# bitsandbytes-rocm The bitsandbytes is a lightweight wrapper around CUDA custom functions, in particular 8-bit optimizers, matrix multiplication (LLM.int8()), and quantization functions. @@ -12,8 +12,17 @@ Resources: ## TL;DR **Requirements** Linux distribution (Ubuntu, MacOS, etc.) + CUDA >= 10.0. LLM.int8() requires Turing or Ampere GPUs. -**Installation**: -``pip install bitsandbytes`` + +**Compilation quickstart:** + +The HIP version does not provide binary release so that you need to compile from source. If this happens please consider submitting a bug report with python -m bitsandbytes information. + +```shell +git clone https://github.com/xxx/bitsandbytes.git +cd bitsandbytes +make hip +CUDA_VERSION=gfx1035 python setup.py install +``` **Using 8-bit optimizer**: 1. Comment out optimizer: ``#torch.optim.Adam(....)``