Skip to content

Commit 4fcd45d

Browse files
authored
Upgrade to IPEX 2.8 (#702)
Signed-off-by: Liu, Kaixuan <[email protected]>
1 parent 0a22f3c commit 4fcd45d

File tree

2 files changed

+22
-40
lines changed

2 files changed

+22
-40
lines changed

Dockerfile-intel

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
5959
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
6060
cargo build --release --bin text-embeddings-router -F grpc -F python --no-default-features && sccache -s
6161

62-
FROM intel/intel-extension-for-pytorch:2.7.0-pip-base AS cpu
62+
FROM intel/intel-extension-for-pytorch:2.8.0-pip-base AS cpu
6363
ENV HUGGINGFACE_HUB_CACHE=/data \
6464
PORT=80
6565

@@ -77,7 +77,7 @@ COPY backends/python/server/text_embeddings_server/models/__init__.py backends/p
7777
COPY backends/python/server/pyproject.toml backends/python/server/pyproject.toml
7878
COPY backends/python/server/requirements-intel.txt backends/python/server/requirements.txt
7979

80-
RUN python -m pip install torch==2.7.0 torchvision torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
80+
RUN python -m pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
8181

8282
RUN cd backends/python/server && \
8383
make install
@@ -104,7 +104,7 @@ COPY backends/python/server/requirements-hpu.txt backends/python/server/requirem
104104
RUN cd backends/python/server && \
105105
make install
106106

107-
FROM intel/intel-extension-for-pytorch:2.7.10-xpu AS xpu
107+
FROM intel/intel-extension-for-pytorch:2.8.10-xpu AS xpu
108108

109109
ENV HUGGINGFACE_HUB_CACHE=/data \
110110
PORT=80
@@ -118,8 +118,8 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
118118

119119
RUN apt-get update && apt install -y intel-basekit cmake vim python3-dev ninja-build pciutils
120120
WORKDIR /usr/src
121-
RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
122-
RUN pip install intel-extension-for-pytorch==2.7.10+xpu oneccl_bind_pt==2.7.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
121+
RUN pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
122+
RUN pip install intel-extension-for-pytorch==2.8.10+xpu oneccl_bind_pt==2.8.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
123123

124124
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
125125
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest

backends/python/server/text_embeddings_server/utils/flash_attn.py

Lines changed: 17 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -94,41 +94,23 @@ def attention(
9494
if use_ipex:
9595
import intel_extension_for_pytorch as ipex
9696

97-
if q.device.type == "xpu":
98-
return ipex.llm.functional.varlen_attention(
99-
q.contiguous(),
100-
k.contiguous(),
101-
v.contiguous(),
102-
out,
103-
cu_seqlens,
104-
cu_seqlens,
105-
None,
106-
max_s,
107-
max_s,
108-
0,
109-
softmax_scale,
110-
zero_tensors=False,
111-
is_causal=False,
112-
return_softmax=False,
113-
gen_=None,
114-
)
115-
elif q.device.type == "cpu":
116-
return ipex.llm.functional.varlen_attention(
117-
q,
118-
k,
119-
v,
120-
out,
121-
cu_seqlens,
122-
cu_seqlens,
123-
max_s,
124-
max_s,
125-
0,
126-
softmax_scale,
127-
zero_tensors=False,
128-
is_causal=False,
129-
return_softmax=False,
130-
gen_=None,
131-
)
97+
return ipex.llm.functional.varlen_attention(
98+
q.contiguous() if q.device.type == "xpu" else q,
99+
k.contiguous() if k.device.type == "xpu" else k,
100+
v.contiguous() if v.device.type == "xpu" else v,
101+
out,
102+
cu_seqlens,
103+
cu_seqlens,
104+
None,
105+
max_s,
106+
max_s,
107+
0,
108+
softmax_scale,
109+
zero_tensors=False,
110+
is_causal=False,
111+
return_softmax=False,
112+
gen_=None,
113+
)
132114

133115
elif is_hpu:
134116
return hpu_attn(

0 commit comments

Comments
 (0)