Xaenalt
diff --git a/‎Dockerfile
Lines changed: 0 additions & 24 deletions b/‎Dockerfile
Lines changed: 0 additions & 24 deletions
diff --git a/‎server/exllama_kernels/exllama_kernels/cuda_buffers.cu
Lines changed: 0 additions & 71 deletions b/‎server/exllama_kernels/exllama_kernels/cuda_buffers.cu
Lines changed: 0 additions & 71 deletions
diff --git a/‎server/exllama_kernels/exllama_kernels/cuda_buffers.cuh
Lines changed: 0 additions & 52 deletions b/‎server/exllama_kernels/exllama_kernels/cuda_buffers.cuh
Lines changed: 0 additions & 52 deletions
diff --git a/‎server/exllama_kernels/exllama_kernels/cuda_compat.cuh
Lines changed: 0 additions & 58 deletions b/‎server/exllama_kernels/exllama_kernels/cuda_compat.cuh
Lines changed: 0 additions & 58 deletions
diff --git a/‎server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cu
Lines changed: 0 additions & 61 deletions b/‎server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cu
Lines changed: 0 additions & 61 deletions
diff --git a/‎server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
Lines changed: 0 additions & 19 deletions b/‎server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
Lines changed: 0 additions & 19 deletions
@@ -253,24 +253,6 @@ COPY server/custom_kernels/ /usr/src/.
 RUN cd /usr/src && python setup.py build_ext && python setup.py install
 
 
-## Build transformers exllama kernels ##########################################
-FROM python-builder as exllama-kernels-builder
-
-WORKDIR /usr/src
-
-COPY server/exllama_kernels/ .
-RUN python setup.py build
-
-
-## Build transformers exllamav2 kernels ########################################
-FROM python-builder as exllamav2-kernels-builder
-
-WORKDIR /usr/src
-
-COPY server/exllamav2_kernels/ .
-RUN python setup.py build
-
-
 ## Flash attention v2 cached build image #######################################
 FROM base as flash-att-v2-cache
 
@@ -301,12 +283,6 @@ ENV PATH=/opt/tgis/bin:$PATH
 RUN --mount=type=bind,from=flash-att-v2-cache,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
     pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
 
-# Copy build artifacts from exllama kernels builder
-COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-* ${SITE_PACKAGES}
-
-# Copy build artifacts from exllamav2 kernels builder
-COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-* ${SITE_PACKAGES}
-
 # Copy over the auto-gptq wheel and install it
 RUN --mount=type=bind,from=auto-gptq-cache,src=/usr/src/auto-gptq-wheel,target=/usr/src/auto-gptq-wheel \
     pip install /usr/src/auto-gptq-wheel/*.whl --no-cache-dir