diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml
index 0a3fad3d..12c3e5f2 100644
--- a/.github/workflows/publish_devel_image.yml
+++ b/.github/workflows/publish_devel_image.yml
@@ -1,6 +1,17 @@
 name: Publish devel image
 on:
   workflow_dispatch:
+    inputs:
+      push:
+        description: 'Push to Docker Hub'
+        required: false
+        default: 'true'
+
+  # Schedule the workflow to run at 10:00 (UTC) every month.
+  schedule:
+    # Minute[0,59] Hour[0,23] Day of month[1,31] Month[1,12] Day of week[0,6] (Sunday=0)
+    - cron: '0 10 1 * *'  
+
 env:
   # Tells where to store caches.
   CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache
@@ -27,12 +38,28 @@ jobs:
       - name: Create cache directory
         run: mkdir -p $CI_CACHE_DIR/.buildx-cache
 
+      - name: Build devel image for cuda 12.6 (experimental)
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker
+          file: ./docker/Dockerfile.devel
+          push: ${{ inputs.push || 'true' }}
+          cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
+          cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
+          build-args: |
+            UBUNTU_VERSION=22.04
+            CUDA_VERSION=12.6
+            GCC_VERSION=12
+          tags: |
+            vectorchai/scalellm_devel:cuda12.6-ubuntu22.04
+            vectorchai/scalellm_devel:cuda12.6
+
       - name: Build devel image for cuda 12.4
         uses: docker/build-push-action@v5
         with:
           context: ./docker
           file: ./docker/Dockerfile.devel
-          push: true
+          push: ${{ inputs.push || 'true' }}
           cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
           cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
           build-args: |
@@ -49,7 +76,7 @@ jobs:
         with:
           context: ./docker
           file: ./docker/Dockerfile.devel
-          push: true
+          push: ${{ inputs.push || 'true' }}
           cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
           cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
           build-args: |
@@ -65,7 +92,7 @@ jobs:
         with:
           context: ./docker
           file: ./docker/Dockerfile.devel
-          push: true
+          push: ${{ inputs.push || 'true' }}
           cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
           cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
           build-args: |
diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml
index 682a2eba..3ec0898b 100644
--- a/.github/workflows/publish_docs.yml
+++ b/.github/workflows/publish_docs.yml
@@ -21,6 +21,11 @@ on:
   
   workflow_call:
 
+  # Schedule the workflow to run at 9:00 (UTC) every month.
+  schedule:
+    # Minute[0,59] Hour[0,23] Day of month[1,31] Month[1,12] Day of week[0,6] (Sunday=0)
+    - cron: '0 9 1 * *'
+
 # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
 permissions:
   contents: read
@@ -58,7 +63,8 @@ jobs:
     if: |
       github.event_name == 'push' || 
       github.event_name == 'workflow_call' || 
-      github.event_name == 'workflow_dispatch'
+      github.event_name == 'workflow_dispatch' ||
+      github.event_name == 'schedule'
     environment:
       name: github-pages
       url: ${{ steps.deployment.outputs.page_url }}
diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml
index afa6545c..b6ad0f98 100644
--- a/.github/workflows/publish_manylinux_image.yml
+++ b/.github/workflows/publish_manylinux_image.yml
@@ -1,6 +1,17 @@
 name: Publish manylinux image
 on:
   workflow_dispatch:
+    inputs:
+      push:
+        description: 'Push to Docker Hub'
+        required: false
+        default: 'true'
+
+  # Schedule the workflow to run at 9:00 (UTC) every month.
+  schedule:
+    # Minute[0,59] Hour[0,23] Day of month[1,31] Month[1,12] Day of week[0,6] (Sunday=0)
+    - cron: '0 9 1 * *'
+
 env:
   # Tells where to store caches.
   CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache
@@ -10,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        cuda: ["11.8", "12.1", "12.4"]
+        cuda: ["11.8", "12.1", "12.4", "12.6"]
     runs-on: [self-hosted, linux, build]
     steps:
       - name: Checkout repository
@@ -36,7 +47,7 @@ jobs:
         with:
           context: ./docker
           file: ./docker/Dockerfile.manylinux
-          push: true
+          push: ${{ inputs.push || 'true' }}
           cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
           cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
           build-args: |
diff --git a/docker/common/install_cuda.sh b/docker/common/install_cuda.sh
index 413c08f9..bea67665 100755
--- a/docker/common/install_cuda.sh
+++ b/docker/common/install_cuda.sh
@@ -139,6 +139,39 @@ function install_124 {
   ldconfig
 }
 
+function install_126 {
+  echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
+  rm -rf /usr/local/cuda-12.6 /usr/local/cuda
+  # install CUDA 12.6.2 in the same container
+  wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
+  chmod +x cuda_12.6.2_560.35.03_linux.run
+  ./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
+  rm -f cuda_12.6.2_560.35.03_linux.run
+  rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
+
+  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
+  mkdir tmp_cudnn && cd tmp_cudnn
+  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
+  cd ..
+  rm -rf tmp_cudnn
+
+  # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
+  # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
+  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
+  cd nccl && make -j src.build
+  cp -a build/include/* /usr/local/cuda/include/
+  cp -a build/lib/* /usr/local/cuda/lib64/
+  cd ..
+  rm -rf nccl
+
+  install_cusparselt_062
+
+  ldconfig
+}
+
 function prune_118 {
     echo "Pruning CUDA 11.8 and cuDNN"
     #####################################################################################
@@ -229,12 +262,46 @@ function prune_124 {
   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
 
   #####################################################################################
-  # CUDA 12.1 prune visual tools
+  # CUDA 12.4 prune visual tools
   #####################################################################################
   export CUDA_BASE="/usr/local/cuda-12.4/"
   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
 }
 
+function prune_126 {
+  echo "Pruning CUDA 12.6"
+  #####################################################################################
+  # CUDA 12.6 prune static libs
+  #####################################################################################
+  export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
+  export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
+
+  export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+  export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+
+  if [[ -n "$OVERRIDE_GENCODE" ]]; then
+      export GENCODE=$OVERRIDE_GENCODE
+  fi
+  if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
+      export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
+  fi
+
+  # all CUDA libs except CuDNN and CuBLAS
+  ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
+      | xargs -I {} bash -c \
+                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
+
+  # prune CuDNN and CuBLAS
+  $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
+  $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
+
+  #####################################################################################
+  # CUDA 12.6 prune visual tools
+  #####################################################################################
+  export CUDA_BASE="/usr/local/cuda-12.6/"
+  rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
+}
+
 # idiomatic parameter and option handling in sh
 while test $# -gt 0
 do
@@ -245,6 +312,8 @@ do
         ;;
     12.4) install_124; prune_124
         ;;
+    12.6) install_126; prune_126
+        ;;
     *) echo "bad argument $1"; exit 1
         ;;
     esac
diff --git a/scripts/build_wheel.sh b/scripts/build_wheel.sh
index c8136682..b2f778bd 100755
--- a/scripts/build_wheel.sh
+++ b/scripts/build_wheel.sh
@@ -23,6 +23,9 @@ export PATH="$HOME/.local/bin:$PATH"
 PYVER="${PYTHON_VERSION//./}"
 export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH"
 
+# update pip
+python -m pip install --upgrade pip
+
 # install PyTorch
 pip install torch==$TORCH_VERSION -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}"
 
diff --git a/scripts/run_pytest.sh b/scripts/run_pytest.sh
index 4a47af8f..1f8be1a0 100755
--- a/scripts/run_pytest.sh
+++ b/scripts/run_pytest.sh
@@ -24,6 +24,9 @@ export PATH="$HOME/.local/bin:$PATH"
 PYVER="${PYTHON_VERSION//./}"
 export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH"
 
+# update pip
+python -m pip install --upgrade pip
+
 # install PyTorch
 pip install torch==$TORCH_VERSION -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}"
 
@@ -33,6 +36,6 @@ pip install -r requirements-test.txt
 # install scalellm wheel
 pip install dist/*.whl
 
-# run pytest
+# run pytest within the tests directory
 cd tests
 pytest
\ No newline at end of file