Skip to content

Commit 92f1978

Browse files
committed
Update flash attention 2 to v2.3.4
1 parent 54625a9 commit 92f1978

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

Dockerfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ ENV CUDA_VERSION=11.8.0 \
3232
NV_CUDA_COMPAT_VERSION=520.61.05-1
3333

3434
RUN dnf config-manager \
35-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
35+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
3636
&& dnf install -y \
3737
cuda-cudart-11-8-${NV_CUDA_CUDART_VERSION} \
3838
cuda-compat-11-8-${NV_CUDA_COMPAT_VERSION} \
@@ -53,7 +53,7 @@ ENV NV_NVTX_VERSION=11.8.86-1 \
5353
NV_LIBNCCL_PACKAGE_VERSION=2.15.5-1+cuda11.8
5454

5555
RUN dnf config-manager \
56-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
56+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
5757
&& dnf install -y \
5858
cuda-libraries-11-8-${NV_CUDA_LIB_VERSION} \
5959
cuda-nvtx-11-8-${NV_NVTX_VERSION} \
@@ -72,7 +72,7 @@ ENV NV_CUDA_CUDART_DEV_VERSION=11.8.89-1 \
7272
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.15.5-1+cuda11.8
7373

7474
RUN dnf config-manager \
75-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
75+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
7676
&& dnf install -y \
7777
cuda-command-line-tools-11-8-${NV_CUDA_LIB_VERSION} \
7878
cuda-libraries-devel-11-8-${NV_CUDA_LIB_VERSION} \
@@ -158,7 +158,7 @@ RUN cd server && \
158158
make gen-server && \
159159
pip install ".[accelerate]" --no-cache-dir
160160

161-
# Patch codegen model changes into transformers 4.34
161+
# Patch codegen model changes into transformers 4.35
162162
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
163163

164164
# Install router
@@ -277,7 +277,7 @@ COPY proto proto
277277
COPY server server
278278
RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu, quantize]" --no-cache-dir
279279

280-
# Patch codegen model changes into transformers 4.34.0
280+
# Patch codegen model changes into transformers 4.35
281281
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
282282

283283
# Install router

server/Makefile-flash-att

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ flash_att_commit := v1.0.9
33
flash-attention:
44
# Clone flash attention
55
pip install packaging
6-
git clone https://github.com/HazyResearch/flash-attention.git --branch main --single-branch
6+
git clone https://github.com/Dao-AILab/flash-attention.git --branch main --single-branch
77

88
build-flash-attention: flash-attention
99
cd flash-attention && git fetch && git checkout $(flash_att_commit)

server/Makefile-flash-att-v2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
flash_att_v2_commit := v2.3.2
1+
flash_att_v2_commit := v2.3.4
22

33
flash-attention-v2:
44
# Clone flash attention
55
pip install packaging
6-
git clone https://github.com/HazyResearch/flash-attention.git --branch main --single-branch flash-attention-v2
6+
git clone https://github.com/Dao-AILab/flash-attention.git --branch main --single-branch flash-attention-v2
77

88
build-flash-attention-v2: flash-attention-v2
99
cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit)

0 commit comments

Comments
 (0)