PennyLaneAI · josephleekl · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -35,6 +35,9 @@
 
 <h3>Internal changes ⚙️</h3>
 
+- Upgraded Kokkos from v4.5.0 to v5.0.0.
+  [(#1308)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1308)
+
 - Upgrade CIs to use CUDA 12.9.
   [(#1353)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1353)
   [(#1354)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1354)

diff --git a/.github/workflows/build_and_cache_Kokkos_linux.yml b/.github/workflows/build_and_cache_Kokkos_linux.yml
@@ -20,7 +20,7 @@ on:
       kokkos_version:
         required: false
         type: string
-        default: 4.5.00
+        default: 5.0.0
     outputs:
       exec_model:
         description: "The execution model for Kokkos."

diff --git a/.github/workflows/set_wheel_build_matrix.yml b/.github/workflows/set_wheel_build_matrix.yml
@@ -85,7 +85,7 @@ jobs:
 
       - name: Kokkos version
         id: kokkos_version
-        run: echo "kokkos_version=[\"4.5.00\"]" >> $GITHUB_OUTPUT
+        run: echo "kokkos_version=[\"5.0.0\"]" >> $GITHUB_OUTPUT
 
     outputs:
       python_version: ${{ steps.pyver.outputs.python_version }}

diff --git a/.github/workflows/tests_lkcpu_python.yml b/.github/workflows/tests_lkcpu_python.yml
@@ -48,7 +48,7 @@ jobs:
     uses: ./.github/workflows/build_and_cache_Kokkos_linux.yml
     with:
       os: ubuntu-24.04
-      kokkos_version: "4.5.00"
+      kokkos_version: "5.0.0"
 
   build_lightning_kokkos_wheels:
     needs: [build_and_cache_Kokkos]

diff --git a/.github/workflows/tests_lkcuda_cpp.yml b/.github/workflows/tests_lkcuda_cpp.yml
@@ -44,7 +44,7 @@ jobs:
       max-parallel: 1
       matrix:
         os: [ubuntu-22.04]
-        kokkos_version: ["4.5.00"]
+        kokkos_version: ["5.0.0"]
         exec_model: ["CUDA"]
         cuda_version_maj: ["12"]
         cuda_version_min: ["9"]
@@ -110,7 +110,7 @@ jobs:
       matrix:
         os: [ubuntu-22.04]
         pl_backend: ["lightning_kokkos"]
-        kokkos_version: ["4.5.00"]
+        kokkos_version: ["5.0.0"]
         exec_model: ["CUDA"]
         cuda_version_maj: ["12"]
         cuda_version_min: ["9"]

diff --git a/.github/workflows/tests_lkcuda_python.yml b/.github/workflows/tests_lkcuda_python.yml
@@ -50,7 +50,7 @@ jobs:
       max-parallel: 1
       matrix:
         os: [ubuntu-22.04]
-        kokkos_version: ["4.5.00"]
+        kokkos_version: ["5.0.0"]
         exec_model: ["CUDA"]
         cuda_version_maj: ["12"]
         cuda_version_min: ["9"]
@@ -116,7 +116,7 @@ jobs:
       matrix:
         os: [ubuntu-22.04]
         pl_backend: ["lightning_kokkos", "all"]
-        kokkos_version: ["4.5.00"]
+        kokkos_version: ["5.0.0"]
         exec_model: ["CUDA"]
         cuda_version_maj: ["12"]
         cuda_version_min: ["9"]

diff --git a/.github/workflows/tests_lkmpi_cuda_cpp.yml b/.github/workflows/tests_lkmpi_cuda_cpp.yml
@@ -46,7 +46,7 @@ jobs:
       max-parallel: 1
       matrix:
         mpilib: ["openmpi"]
-        kokkos_version: ["4.5.00"]
+        kokkos_version: ["5.0.0"]
         exec_model: ["CUDA"]
         cuda_version_maj: ["12"]
         cuda_version_min: ["9"]
@@ -106,7 +106,7 @@ jobs:
           # Adding venv name as an output for subsequent steps to reference if needed
           echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT
           echo "Python_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV
-          echo "Python3_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV        
+          echo "Python3_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV
 
       - name: Display Python-Path
         id: python_path
@@ -121,7 +121,7 @@ jobs:
 
       - name: Install required packages
         run: |
-          python -m pip install --upgrade pip      # Upgrade pip    
+          python -m pip install --upgrade pip      # Upgrade pip
           python -m pip install --group tests
           python -m pip install cmake scipy
 

diff --git a/.github/workflows/tests_lkmpi_cuda_python.yml b/.github/workflows/tests_lkmpi_cuda_python.yml
@@ -48,7 +48,7 @@ jobs:
       max-parallel: 1
       matrix:
         mpilib: ["openmpi"]
-        kokkos_version: ["4.5.00"]
+        kokkos_version: ["5.0.0"]
         exec_model: ["CUDA"]
         cuda_version_maj: ["12"]
         cuda_version_min: ["9"]

diff --git a/.github/workflows/tests_windows_cpp.yml b/.github/workflows/tests_windows_cpp.yml
@@ -60,7 +60,7 @@ jobs:
 
       - name: Kokkos version
         id: kokkos_version
-        run: echo "kokkos_version=[\"4.5.00\"]" >> $GITHUB_OUTPUT
+        run: echo "kokkos_version=[\"5.0.0\"]" >> $GITHUB_OUTPUT
 
     outputs:
       exec_model: ${{ steps.exec_model.outputs.exec_model }}

diff --git a/cmake/support_kokkos.cmake b/cmake/support_kokkos.cmake
@@ -5,7 +5,7 @@
 # Include this file only once
 include_guard()
 
-set(KOKKOS_VERSION 4.5.00)
+set(KOKKOS_VERSION 5.0.0)
 
 # Macro to aid in finding Kokkos with 3 potential install options:
 # 1. Fully integrated Kokkos packages and CMake module files

diff --git a/doc/lightning_amdgpu/installation.rst b/doc/lightning_amdgpu/installation.rst
@@ -10,7 +10,7 @@ The installation instruction here is specifically for AMD MI300 GPU (GFX942); fo
 
 .. note::
 
-    Lightning-Kokkos and Lightning-AMDGPU are tested with Kokkos version 4.5.00
+    Lightning-Kokkos and Lightning-AMDGPU is tested with Kokkos version 5.0.0
 
 
 Install Lightning-AMDGPU
@@ -33,10 +33,7 @@ Install Lightning-AMDGPU
 
     # Install Lightning-AMDGPU
     PL_BACKEND="lightning_amdgpu" python scripts/configure_pyproject_toml.py
-    export CMAKE_ARGS="-DCMAKE_CXX_COMPILER=hipcc \
-                       -DKokkos_ENABLE_HIP=ON \
-                       -DKokkos_ARCH_AMD_GFX942=ON \
-                       -DCMAKE_PREFIX_PATH=/opt/rocm"
+    export CMAKE_ARGS="-DCMAKE_CXX_COMPILER=hipcc -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_AMD_GFX942=ON"
     python -m pip install . -vv
 
 

diff --git a/doc/lightning_kokkos/installation.rst b/doc/lightning_kokkos/installation.rst
@@ -19,7 +19,7 @@ Install Kokkos (Optional)
 
 .. note::
 
-    Lightning-Kokkos is tested with Kokkos version 4.5.00
+    Lightning-Kokkos is tested with Kokkos version 5.0.0
 
 We suggest first installing Kokkos with the wanted configuration following the instructions found in the `Kokkos documentation <https://kokkos.github.io/kokkos-core-wiki/building.html>`_.
 For example, the following will build Kokkos for NVIDIA A100 cards:
@@ -29,16 +29,16 @@ Download the `Kokkos code <https://github.com/kokkos/kokkos/releases>`_.
 .. code-block:: bash
 
     # Replace x, y, and z by the correct version
-    wget https://github.com/kokkos/kokkos/archive/refs/tags/4.x.yz.tar.gz
-    tar -xvf 4.x.y.z.tar.gz
-    cd kokkos-4.x.y.z
+    wget https://github.com/kokkos/kokkos/archive/refs/tags/5.0.0.tar.gz
+    tar -xvf 5.0.0.tar.gz
+    cd kokkos-5.0.0
 
 Build Kokkos for NVIDIA A100 cards (``SM80`` architecture), and append the install location to ``CMAKE_PREFIX_PATH``.
 
 .. code-block:: bash
 
     # Replace <install-path> with the path to install Kokkos
-    # e.g. $HOME/kokkos-install/4.5.0/AMPERE80
+    # e.g. $HOME/kokkos-install/5.0.0/AMPERE80
     export KOKKOS_INSTALL_PATH=<install-path>
     mkdir -p ${KOKKOS_INSTALL_PATH}
 
@@ -113,7 +113,7 @@ Install Lightning-Kokkos with MPI
 
 .. note::
 
-    Building Lightning-Kokkos with MPI requires an MPI library and ``mpi4py``. 
+    Building Lightning-Kokkos with MPI requires an MPI library and ``mpi4py``.
     If building for GPU, please ensure that MPI is built with GPU support - for example, see the guide to
     build OpenMPI with `CUDA <https://docs.open-mpi.org/en/v5.0.x/tuning-apps/networking/cuda.html>`_
     and `ROCm <https://docs.open-mpi.org/en/v5.0.x/tuning-apps/networking/rocm.html>`_ support.
@@ -130,7 +130,7 @@ Then Lightning-Kokkos with MPI support can be installed in the *editable* mode b
     python -m pip install --group base
     pip install git+https://github.com/PennyLaneAI/pennylane.git@main
 
-    # Lightning-Qubit needs to be 'installed' by pip before Lightning-Kokkos 
+    # Lightning-Qubit needs to be 'installed' by pip before Lightning-Kokkos
     # (compilation is not necessary)
     PL_BACKEND="lightning_qubit" python scripts/configure_pyproject_toml.py
     SKIP_COMPILATION=True pip install -e . --config-settings editable_mode=compat

diff --git a/doc/lightning_kokkos/installation_hpc.rst b/doc/lightning_kokkos/installation_hpc.rst
@@ -28,7 +28,7 @@ Install Kokkos (Recommended)
 
 .. note::
 
-    Lightning-Kokkos is tested with Kokkos version 4.5.00
+    Lightning-Kokkos is tested with Kokkos version 5.0.0
 
 We suggest first installing Kokkos with the desired configuration, following the instructions found in the Kokkos documentation.
 For example, the following instructions demonstrate building Kokkos for AMD MI210/250/250X GPUs:
@@ -38,9 +38,9 @@ Download the `Kokkos code <https://github.com/kokkos/kokkos/releases>`_.
 .. code-block:: bash
 
     # Replace x, y, and z by the correct version
-    wget https://github.com/kokkos/kokkos/archive/refs/tags/4.x.yz.tar.gz
-    tar -xvf 4.x.y.z.tar.gz
-    cd kokkos-4.x.y.z
+    wget https://github.com/kokkos/kokkos/archive/refs/tags/5.0.0.tar.gz
+    tar -xvf 5.0.0.tar.gz
+    cd kokkos-5.0.0/
 
 Build Kokkos for AMD GPU (``GFX90A`` architecture), and append the install location to ``CMAKE_PREFIX_PATH``.
 
@@ -80,7 +80,7 @@ It can be installed from source as follows:
     python -m pip install --group base
     pip install git+https://github.com/PennyLaneAI/pennylane.git@main
     PL_BACKEND="lightning_qubit" python scripts/configure_pyproject_toml.py
-    CMAKE_ARGS="-DCMAKE_CXX_COMPILER=CC" pip install .
+    CMAKE_ARGS="-DCMAKE_CXX_COMPILER=CC" pip install . -vv
 
 Then to install Lightning-Kokkos with MPI support:
 
@@ -95,7 +95,7 @@ Then to install Lightning-Kokkos with MPI support:
     export CMAKE_ARGS="-DENABLE_MPI=ON -DCMAKE_CXX_COMPILER=hipcc"
 
     PL_BACKEND="lightning_kokkos" python scripts/configure_pyproject_toml.py
-    python -m pip install .
+    python -m pip install . -vv
 
 .. note::
 

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -16,7 +16,7 @@
 ARG PENNYLANE_VERSION=master
 ARG LIGHTNING_VERSION=master
 ARG GCC_VERSION=13
-ARG CUDA_INSTALLER=https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda_12.5.1_555.42.06_linux.run
+ARG CUDA_INSTALLER=https://developer.download.nvidia.com/compute/cuda/12.9.1/local_installers/cuda_12.9.1_575.57.08_linux.run
 ARG ROCM_INSTALLER=https://repo.radeon.com/amdgpu-install/7.0.3/ubuntu/jammy/amdgpu-install_7.0.3.70003-1_all.deb
 ARG AMD_ARCH=AMD_GFX942
 ARG CUDA_ARCH=AMPERE80
@@ -115,7 +115,7 @@
 RUN chmod a+x cuda-install.run
 RUN ./cuda-install.run --silent --toolkit --toolkitpath=/usr/local/cuda-$(echo ${CUDA_INSTALLER} | grep -o -P '/cuda/.{0,4}' | cut -d / -f 3)
 ENV PATH=/usr/local/cuda/bin:${PATH}
 ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}

 # Download Lightning release and build lightning-kokkos backend with Kokkos-CUDA
 FROM base-build-cuda AS build-wheel-lightning-kokkos-cuda
@@ -133,7 +133,7 @@
 
 # Install python3 and setup runtime virtual env in CUDA-12-runtime image (includes CUDA runtime and math libraries)
 # Install lightning-kokkos CUDA backend
-FROM nvidia/cuda:12.5.1-base-ubuntu24.04 AS wheel-lightning-kokkos-cuda
+FROM nvidia/cuda:12.9.1-base-ubuntu24.04 AS wheel-lightning-kokkos-cuda
 ARG PENNYLANE_VERSION
 ARG GCC_VERSION
 ENV DEBIAN_FRONTEND=noninteractive
@@ -168,7 +168,7 @@
 
 # Install python3 and setup runtime virtual env in CUDA-12-runtime image (includes CUDA runtime and math libraries)
 # Install lightning-kokkos CUDA backend
-FROM nvidia/cuda:12.5.1-runtime-ubuntu24.04 AS wheel-lightning-gpu
+FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS wheel-lightning-gpu
 ARG PENNYLANE_VERSION
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update \
@@ -209,7 +209,7 @@
 FROM base-build-rocm AS build-wheel-lightning-kokkos-rocm
 ARG AMD_ARCH
 WORKDIR /opt/pennylane-lightning
 ENV CMAKE_PREFIX_PATH=/opt/rocm:$CMAKE_PREFIX_PATH
 ENV CXX=hipcc
 ENV PL_BACKEND=lightning_amdgpu
 RUN pip uninstall -y pennylane-lightning
@@ -236,7 +236,7 @@
 ENV VIRTUAL_ENV=/opt/venv
 RUN python3 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 ENV LD_LIBRARY_PATH="/usr/lib/llvm-14/lib:$LD_LIBRARY_PATH"
 COPY --from=build-wheel-lightning-kokkos-rocm /opt/pennylane-lightning/dist/ /
 COPY --from=build-wheel-lightning-qubit /opt/pennylane-lightning/dist/ /
 RUN pip install --force-reinstall --no-cache-dir pennylane_lightning*.whl && rm pennylane_lightning*.whl

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
 Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.45.0-dev28"
+__version__ = "0.45.0-dev29"
diff --git a/pennylane_lightning/core/bindings/BindingsCudaUtils.hpp b/pennylane_lightning/core/bindings/BindingsCudaUtils.hpp
@@ -26,6 +26,7 @@
 #include "cuda_helpers.hpp"
 
 #include <nanobind/nanobind.h>
+#include <nanobind/stl/pair.h>
 
 /// @cond DEV
 namespace {

diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGateFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGateFunctors.hpp
@@ -1840,9 +1840,10 @@ template <typename PrecisionT> class applyMultiRZFunctor {
     }
 
     KOKKOS_FUNCTION void operator()(const std::size_t k) const {
-        arr(k) *= (Kokkos::Impl::bit_count(k & wires_parity) % 2 == 0)
-                      ? shift_0
-                      : shift_1;
+        arr(k) *=
+            (Kokkos::Experimental::popcount_builtin(k & wires_parity) % 2 == 0)
+                ? shift_0
+                : shift_1;
     }
 };
 
@@ -1880,10 +1881,12 @@ void applyNCMultiRZ(Kokkos::View<Kokkos::complex<PrecisionT> *> arr_,
         Kokkos::View<Kokkos::complex<PrecisionT> *> arr, std::size_t i,
         Kokkos::View<std::size_t *> indices, std::size_t offset) {
         std::size_t index = indices(i);
-        arr(index + offset) *=
-            (Kokkos::Impl::bit_count((index + offset) & wires_parity) % 2 == 0)
-                ? shift_0
-                : shift_1;
+        arr(index + offset) *= (Kokkos::Experimental::popcount_builtin(
+                                    (index + offset) & wires_parity) %
+                                    2 ==
+                                0)
+                                   ? shift_0
+                                   : shift_1;
     };
 
     applyNCNFunctor(ExecutionSpace{}, arr_, num_qubits, controlled_wires,
@@ -1932,8 +1935,10 @@ void applyPauliRot(Kokkos::View<Kokkos::complex<PrecisionT> *> arr_,
         KOKKOS_LAMBDA(std::size_t i0) {
             std::size_t i1 = i0 ^ mask_xy;
             if (i0 <= i1) {
-                const auto count_y = Kokkos::Impl::bit_count(i0 & mask_y) * 2;
-                const auto count_z = Kokkos::Impl::bit_count(i0 & mask_z) * 2;
+                const auto count_y =
+                    Kokkos::Experimental::popcount_builtin(i0 & mask_y) * 2;
+                const auto count_z =
+                    Kokkos::Experimental::popcount_builtin(i0 & mask_z) * 2;
                 const auto sign_i0 = count_z + count_mask_y * 3 - count_y;
                 const auto sign_i1 = count_z + count_mask_y + count_y;
                 const ComplexT v0 = arr_(i0);

diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGeneratorFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGeneratorFunctors.hpp
@@ -369,7 +369,9 @@ void applyGenMultiRZ(Kokkos::View<Kokkos::complex<PrecisionT> *> arr_,
                                             Pennylane::Util::exp2(num_qubits)),
         KOKKOS_LAMBDA(std::size_t k) {
             arr_(k) *= static_cast<PrecisionT>(
-                1 - 2 * int(Kokkos::Impl::bit_count(k & wires_parity) % 2));
+                1 - 2 * int(Kokkos::Experimental::popcount_builtin(
-                1 - 2 * int(Kokkos::Experimental::popcount_builtin(
+                1 - 2 * int(std::popcount(
-                1 - 2 * int(Kokkos::Experimental::popcount_builtin(
+                1 - 2 * int(std::popcount(
+                                k & wires_parity) %
+                            2));
         });
 }
 
@@ -951,7 +953,9 @@ void applyNCGenMultiRZ(Kokkos::View<Kokkos::complex<PrecisionT> *> arr_,
         KOKKOS_LAMBDA(std::size_t k) {
             if (ctrls_mask == (ctrls_parity & k)) {
                 arr_(k) *= static_cast<PrecisionT>(
-                    1 - 2 * int(Kokkos::Impl::bit_count(k & wires_parity) % 2));
+                    1 - 2 * int(Kokkos::Experimental::popcount_builtin(
-                    1 - 2 * int(Kokkos::Experimental::popcount_builtin(
+                    1 - 2 * int(std::popcount(
-                    1 - 2 * int(Kokkos::Experimental::popcount_builtin(
+                    1 - 2 * int(std::popcount(
+                                    k & wires_parity) %
+                                2));
             } else {
                 arr_(k) = 0.0;
             }

diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/gates/MatrixGateFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/gates/MatrixGateFunctors.hpp
@@ -22,7 +22,7 @@
 /// @cond DEV
 namespace {
 using namespace Pennylane::Util;
-using Kokkos::Experimental::swap;
+using Kokkos::kokkos_swap;
 using Pennylane::LightningKokkos::Util::controlBitPatterns;
 using Pennylane::LightningKokkos::Util::generateBitPatterns;
 using Pennylane::LightningKokkos::Util::one;