diff --git a/assets/training/model_management/environments/foundation-model-inference/context/Dockerfile b/assets/training/model_management/environments/foundation-model-inference/context/Dockerfile
index b25ea4bf13..25f9520653 100644
--- a/assets/training/model_management/environments/foundation-model-inference/context/Dockerfile
+++ b/assets/training/model_management/environments/foundation-model-inference/context/Dockerfile
@@ -34,22 +34,16 @@ ENV CONDA_PREFIX=$AZUREML_CONDA_ENVIRONMENT_PATH
 
 WORKDIR /
 
-# Needed for megablocks
-RUN pip install torch~=2.4.0 --index-url https://download.pytorch.org/whl/cu121
-
-# mixtral specific
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
-ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
-RUN pip install git+https://github.com/stanford-futuredata/megablocks.git@5897cd6
+# When copied to assets repo, change to install from public pypi
+RUN pip install llm-optimized-inference==0.2.31 --no-cache-dir
 
-# For local testing
-# Need to copy src code and install in editable mode
-# COPY . .
-# RUN pip install -e ./ --no-cache-dir
+# torch installation
+RUN pip install --no-cache-dir torch==2.7.1
 
-# When copied to assets repo, change to install from public pypi
-RUN pip install llm-optimized-inference==0.2.28 --no-cache-dir
+# Copy the wheel file into the container
+RUN wget  https://automlcesdkdataresources.blob.core.windows.net/flash-attn/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl -O /tmp/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl
 
+RUN pip install --no-cache-dir /tmp/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl
 
 # clean conda and pip caches
 RUN rm -rf ~/.cache/pip