Switch to PyTorch 2.2 nightly; update flash-attention to 2.3.6

njhill · njhill · commit da698f6023e6 · 2024-01-12T14:49:42.000-08:00
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,9 @@
 ## Global Args #################################################################
 ARG BASE_UBI_IMAGE_TAG=9.3-1361.1699548029
 ARG PROTOC_VERSION=25.1
-ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
-#ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
-ARG PYTORCH_VERSION=2.1.1
+#ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
+ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
+ARG PYTORCH_VERSION=2.2.0.dev20231206
 
 ## Base Layer ##################################################################
 FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} as base
diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2
@@ -1,4 +1,4 @@
-flash_att_v2_commit := v2.3.4
+flash_att_v2_commit := v2.3.6
 
 flash-attention-v2:
     # Clone flash attention
diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py
@@ -64,6 +64,7 @@ def attention(
             out,
             cu_seqlens_q,
             cu_seqlens,
+            None,
             max_s_q,
             max_s,
             0.0,