diff --git a/cmake/support_kokkos.cmake b/cmake/support_kokkos.cmake index 3dd5cacebf..33c9afebe2 100644 --- a/cmake/support_kokkos.cmake +++ b/cmake/support_kokkos.cmake @@ -5,7 +5,7 @@ # Include this file only once include_guard() -set(KOKKOS_VERSION 4.5.00) +set(KOKKOS_VERSION 5.0.0) # Macro to aid in finding Kokkos with 3 potential install options: # 1. Fully integrated Kokkos packages and CMake module files diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 7cf2e95893..663b8c6c28 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.45.0-dev0" +__version__ = "0.45.0-dev1" diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/algorithms/AlgorithmsKokkos.cpp b/pennylane_lightning/core/simulators/lightning_kokkos/algorithms/AlgorithmsKokkos.cpp index df2fc15f65..8a2221b96f 100644 --- a/pennylane_lightning/core/simulators/lightning_kokkos/algorithms/AlgorithmsKokkos.cpp +++ b/pennylane_lightning/core/simulators/lightning_kokkos/algorithms/AlgorithmsKokkos.cpp @@ -23,6 +23,7 @@ using Pennylane::LightningKokkos::StateVectorKokkos; using namespace Pennylane::LightningKokkos::Algorithms; // explicit instantiation +#ifndef __HIP_DEVICE_COMPILE__ template class Pennylane::Algorithms::OpsData>; template class Pennylane::Algorithms::OpsData>; @@ -31,3 +32,4 @@ template class Pennylane::Algorithms::JacobianData>; template class Algorithms::AdjointJacobian>; template class Algorithms::AdjointJacobian>; +#endif diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGateFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGateFunctors.hpp index b82d1dae8c..34b15c6065 100644 --- a/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGateFunctors.hpp +++ b/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGateFunctors.hpp @@ -1840,9 +1840,10 @@ template class applyMultiRZFunctor { } KOKKOS_FUNCTION void operator()(const std::size_t k) const { - arr(k) *= (Kokkos::Impl::bit_count(k & wires_parity) % 2 == 0) - ? shift_0 - : shift_1; + arr(k) *= + (Kokkos::Experimental::popcount_builtin(k & wires_parity) % 2 == 0) + ? shift_0 + : shift_1; } }; @@ -1880,10 +1881,12 @@ void applyNCMultiRZ(Kokkos::View *> arr_, Kokkos::View *> arr, std::size_t i, Kokkos::View indices, std::size_t offset) { std::size_t index = indices(i); - arr(index + offset) *= - (Kokkos::Impl::bit_count((index + offset) & wires_parity) % 2 == 0) - ? shift_0 - : shift_1; + arr(index + offset) *= (Kokkos::Experimental::popcount_builtin( + (index + offset) & wires_parity) % + 2 == + 0) + ? shift_0 + : shift_1; }; applyNCNFunctor(ExecutionSpace{}, arr_, num_qubits, controlled_wires, @@ -1932,8 +1935,10 @@ void applyPauliRot(Kokkos::View *> arr_, KOKKOS_LAMBDA(std::size_t i0) { std::size_t i1 = i0 ^ mask_xy; if (i0 <= i1) { - const auto count_y = Kokkos::Impl::bit_count(i0 & mask_y) * 2; - const auto count_z = Kokkos::Impl::bit_count(i0 & mask_z) * 2; + const auto count_y = + Kokkos::Experimental::popcount_builtin(i0 & mask_y) * 2; + const auto count_z = + Kokkos::Experimental::popcount_builtin(i0 & mask_z) * 2; const auto sign_i0 = count_z + count_mask_y * 3 - count_y; const auto sign_i1 = count_z + count_mask_y + count_y; const ComplexT v0 = arr_(i0); diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGeneratorFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGeneratorFunctors.hpp index 8f88e0a74a..84646c1aaa 100644 --- a/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGeneratorFunctors.hpp +++ b/pennylane_lightning/core/simulators/lightning_kokkos/gates/BasicGeneratorFunctors.hpp @@ -369,7 +369,9 @@ void applyGenMultiRZ(Kokkos::View *> arr_, Pennylane::Util::exp2(num_qubits)), KOKKOS_LAMBDA(std::size_t k) { arr_(k) *= static_cast( - 1 - 2 * int(Kokkos::Impl::bit_count(k & wires_parity) % 2)); + 1 - 2 * int(Kokkos::Experimental::popcount_builtin( + k & wires_parity) % + 2)); }); } @@ -951,7 +953,9 @@ void applyNCGenMultiRZ(Kokkos::View *> arr_, KOKKOS_LAMBDA(std::size_t k) { if (ctrls_mask == (ctrls_parity & k)) { arr_(k) *= static_cast( - 1 - 2 * int(Kokkos::Impl::bit_count(k & wires_parity) % 2)); + 1 - 2 * int(Kokkos::Experimental::popcount_builtin( + k & wires_parity) % + 2)); } else { arr_(k) = 0.0; } diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/gates/MatrixGateFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/gates/MatrixGateFunctors.hpp index e9a9fcf3ff..35f5af2bc0 100644 --- a/pennylane_lightning/core/simulators/lightning_kokkos/gates/MatrixGateFunctors.hpp +++ b/pennylane_lightning/core/simulators/lightning_kokkos/gates/MatrixGateFunctors.hpp @@ -22,7 +22,7 @@ /// @cond DEV namespace { using namespace Pennylane::Util; -using Kokkos::Experimental::swap; +using Kokkos::kokkos_swap; using Pennylane::LightningKokkos::Util::controlBitPatterns; using Pennylane::LightningKokkos::Util::generateBitPatterns; using Pennylane::LightningKokkos::Util::one; diff --git a/pennylane_lightning/core/simulators/lightning_kokkos/measurements/MeasuresFunctors.hpp b/pennylane_lightning/core/simulators/lightning_kokkos/measurements/MeasuresFunctors.hpp index de760d7fd0..a87281d3ff 100644 --- a/pennylane_lightning/core/simulators/lightning_kokkos/measurements/MeasuresFunctors.hpp +++ b/pennylane_lightning/core/simulators/lightning_kokkos/measurements/MeasuresFunctors.hpp @@ -442,18 +442,46 @@ auto probs_bitshift_generic( d_probabilities); break; case 7UL: - Kokkos::parallel_reduce( - exp2(num_qubits - n_wires), - getProbsNQubitOpFunctor(arr, num_qubits, - wires), - d_probabilities); + // Following conditions are here to prevent error for HIP out of shared + // memory error +#ifdef KOKKOS_ENABLE_HIP + if constexpr (std::is_same_v && + sizeof(PrecisionT) == 8) { + Kokkos::parallel_reduce( + Kokkos::RangePolicy>( + 0, exp2(num_qubits - n_wires)), + getProbsNQubitOpFunctor( + arr, num_qubits, wires), + d_probabilities); + } else +#endif + { + Kokkos::parallel_reduce( + exp2(num_qubits - n_wires), + getProbsNQubitOpFunctor( + arr, num_qubits, wires), + d_probabilities); + } break; case 8UL: - Kokkos::parallel_reduce( - exp2(num_qubits - n_wires), - getProbsNQubitOpFunctor(arr, num_qubits, - wires), - d_probabilities); +#ifdef KOKKOS_ENABLE_HIP + if constexpr (std::is_same_v && + sizeof(PrecisionT) == 8) { + Kokkos::parallel_reduce( + Kokkos::RangePolicy>( + 0, exp2(num_qubits - n_wires)), + getProbsNQubitOpFunctor( + arr, num_qubits, wires), + d_probabilities); + } else +#endif + { + Kokkos::parallel_reduce( + exp2(num_qubits - n_wires), + getProbsNQubitOpFunctor( + arr, num_qubits, wires), + d_probabilities); + } break; default: Kokkos::parallel_reduce(