Skip to content

Commit da698f6

Browse files
committed
Switch to PyTorch 2.2 nightly; update flash-attention to 2.3.6
1 parent d4d96ce commit da698f6

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
## Global Args #################################################################
22
ARG BASE_UBI_IMAGE_TAG=9.3-1361.1699548029
33
ARG PROTOC_VERSION=25.1
4-
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
5-
#ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
6-
ARG PYTORCH_VERSION=2.1.1
4+
#ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
5+
ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
6+
ARG PYTORCH_VERSION=2.2.0.dev20231206
77

88
## Base Layer ##################################################################
99
FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} as base

server/Makefile-flash-att-v2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
flash_att_v2_commit := v2.3.4
1+
flash_att_v2_commit := v2.3.6
22

33
flash-attention-v2:
44
# Clone flash attention

server/text_generation_server/utils/flash_attn.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def attention(
6464
out,
6565
cu_seqlens_q,
6666
cu_seqlens,
67+
None,
6768
max_s_q,
6869
max_s,
6970
0.0,

0 commit comments

Comments
 (0)