khuck
diff --git a/‎core/src/Kokkos_Parallel.hpp‎
Lines changed: 9 additions & 6 deletions b/‎core/src/Kokkos_Parallel.hpp‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎core/src/Kokkos_Parallel_Reduce.hpp‎
Lines changed: 5 additions & 3 deletions b/‎core/src/Kokkos_Parallel_Reduce.hpp‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎core/src/Kokkos_Tuners.hpp‎
Lines changed: 120 additions & 3 deletions b/‎core/src/Kokkos_Tuners.hpp‎
Lines changed: 120 additions & 3 deletions
@@ -134,9 +134,10 @@ inline void parallel_for(const std::string& str, const ExecPolicy& policy,
                          const FunctorType& functor) {
   uint64_t kpID = 0;
 
-  ExecPolicy inner_policy = policy;
-  Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID);
-
+  /** Request a tuned policy from the tools subsystem */
+  const auto& response =
+      Kokkos::Tools::Impl::begin_parallel_for(policy, functor, str, kpID);
+  const auto& inner_policy = response.policy;
   Kokkos::Impl::shared_allocation_tracking_disable();
   Impl::ParallelFor<FunctorType, ExecPolicy> closure(functor, inner_policy);
   Kokkos::Impl::shared_allocation_tracking_enable();
@@ -348,9 +349,11 @@ template <class ExecutionPolicy, class FunctorType,
               std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
 inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
                           const FunctorType& functor) {
-  uint64_t kpID                = 0;
-  ExecutionPolicy inner_policy = policy;
-  Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
+  uint64_t kpID = 0;
+  /** Request a tuned policy from the tools subsystem */
+  const auto& response =
+      Kokkos::Tools::Impl::begin_parallel_scan(policy, functor, str, kpID);
+  const auto& inner_policy = response.policy;
 
   Kokkos::Impl::shared_allocation_tracking_disable();
   Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor,
 
@@ -1492,9 +1492,11 @@ struct ParallelReduceAdaptor {
     using PassedReducerType = typename return_value_adapter::reducer_type;
     uint64_t kpID           = 0;
 
-    PolicyType inner_policy = policy;
-    Kokkos::Tools::Impl::begin_parallel_reduce<PassedReducerType>(
-        inner_policy, functor, label, kpID);
+    /** Request a tuned policy from the tools subsystem */
+    auto response = Kokkos::Tools::Impl::begin_parallel_reduce<
+        typename return_value_adapter::reducer_type>(policy, functor, label,
+                                                     kpID);
+    auto& inner_policy = response.policy;
 
     using ReducerSelector =
         Kokkos::Impl::if_c<std::is_same<InvalidType, PassedReducerType>::value,
 
@@ -52,6 +52,8 @@ VariableValue make_variable_value(size_t, int64_t);
 VariableValue make_variable_value(size_t, double);
 SetOrRange make_candidate_range(double lower, double upper, double step,
                                 bool openLower, bool openUpper);
+SetOrRange make_candidate_range(int64_t lower, int64_t upper, int64_t step,
+                                bool openLower, bool openUpper);
 size_t get_new_context_id();
 void begin_context(size_t context_id);
 void end_context(size_t context_id);
@@ -419,10 +421,11 @@ class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
   template <typename ViableConfigurationCalculator, typename Functor,
             typename TagType, typename... Properties>
   TeamSizeTuner(const std::string& name,
-                Kokkos::TeamPolicy<Properties...>& policy,
+                const Kokkos::TeamPolicy<Properties...>& policy_in,
                 const Functor& functor, const TagType& tag,
                 ViableConfigurationCalculator calc) {
     using PolicyType           = Kokkos::TeamPolicy<Properties...>;
+    PolicyType policy(policy_in);
     auto initial_vector_length = policy.impl_vector_length();
     if (initial_vector_length < 1) {
       policy.impl_set_vector_length(1);
@@ -504,7 +507,8 @@ class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
   }
 
   template <typename... Properties>
-  void tune(Kokkos::TeamPolicy<Properties...>& policy) {
+  auto tune(const Kokkos::TeamPolicy<Properties...>& policy_in) {
+    Kokkos::TeamPolicy<Properties...> policy(policy_in);
     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
       auto configuration = tuner.begin();
       auto team_size     = std::get<1>(configuration);
@@ -514,6 +518,117 @@ class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
         policy.impl_set_vector_length(vector_length);
       }
     }
+    return policy;
+  }
+  void end() {
+    if (Kokkos::Tools::Experimental::have_tuning_tool()) {
+      tuner.end();
+    }
+  }
+
+  TunerType get_tuner() const { return tuner; }
+};
+namespace Impl {
+template <class T>
+struct tuning_type_for;
+
+template <>
+struct tuning_type_for<double> {
+  static constexpr Kokkos::Tools::Experimental::ValueType value =
+      Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
+  static double get(
+      const Kokkos::Tools::Experimental::VariableValue& value_struct) {
+    return value_struct.value.double_value;
+  }
+};
+template <>
+struct tuning_type_for<int64_t> {
+  static constexpr Kokkos::Tools::Experimental::ValueType value =
+      Kokkos::Tools::Experimental::ValueType::kokkos_value_int64;
+  static int64_t get(
+      const Kokkos::Tools::Experimental::VariableValue& value_struct) {
+    return value_struct.value.int_value;
+  }
+};
+}  // namespace Impl
+template <class Bound>
+class SingleDimensionalRangeTuner {
+  size_t id;
+  size_t context;
+  using tuning_util = Impl::tuning_type_for<Bound>;
+
+  Bound default_value;
+
+ public:
+  SingleDimensionalRangeTuner() = default;
+  SingleDimensionalRangeTuner(
+      const std::string& name,
+      Kokkos::Tools::Experimental::StatisticalCategory category,
+      Bound default_val, Bound lower, Bound upper, Bound step = (Bound)0) {
+    default_value = default_val;
+    Kokkos::Tools::Experimental::VariableInfo info;
+    info.category   = category;
+    info.candidates = make_candidate_range(
+        static_cast<Bound>(lower), static_cast<Bound>(upper),
+        static_cast<Bound>(step), false, false);
+    info.valueQuantity =
+        Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
+    info.type = tuning_util::value;
+    id        = Kokkos::Tools::Experimental::declare_output_type(name, info);
+  }
+
+  Bound begin() {
+    context = Kokkos::Tools::Experimental::get_new_context_id();
+    Kokkos::Tools::Experimental::begin_context(context);
+    auto tuned_value =
+        Kokkos::Tools::Experimental::make_variable_value(id, default_value);
+    Kokkos::Tools::Experimental::request_output_values(context, 1,
+                                                       &tuned_value);
+    return tuning_util::get(tuned_value);
+  }
+
+  void end() { Kokkos::Tools::Experimental::end_context(context); }
+
+  template <typename Functor>
+  void with_tuned_value(Functor& func) {
+    func(begin());
+    end();
+  }
+};
+
+class RangePolicyOccupancyTuner {
+ private:
+  using TunerType = SingleDimensionalRangeTuner<int64_t>;
+  TunerType tuner;
+
+ public:
+  RangePolicyOccupancyTuner()        = default;
+  RangePolicyOccupancyTuner& operator=(const RangePolicyOccupancyTuner& other) =
+      default;
+  RangePolicyOccupancyTuner(const RangePolicyOccupancyTuner& other) = default;
+  RangePolicyOccupancyTuner& operator=(RangePolicyOccupancyTuner&& other) =
+      default;
+  RangePolicyOccupancyTuner(RangePolicyOccupancyTuner&& other) = default;
+  template <typename ViableConfigurationCalculator, typename Functor,
+            typename TagType, typename... Properties>
+  RangePolicyOccupancyTuner(const std::string& name,
+                            const Kokkos::RangePolicy<Properties...>&,
+                            const Functor&, const TagType&,
+                            ViableConfigurationCalculator)
+      : tuner(TunerType(name,
+                        Kokkos::Tools::Experimental::StatisticalCategory::
+                            kokkos_value_ratio,
+                        100, 5, 100, 5)) {}
+
+  template <typename... Properties>
+  auto tune(const Kokkos::RangePolicy<Properties...>& policy_in) {
+    Kokkos::RangePolicy<Properties...> policy(policy_in);
+    if (Kokkos::Tools::Experimental::have_tuning_tool()) {
+      auto occupancy = tuner.begin();
+      policy.impl_set_desired_occupancy(
+          Kokkos::Experimental::DesiredOccupancy{static_cast<int>(occupancy)});
+    }
+    return policy;
   }
   void end() {
     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
@@ -577,11 +692,13 @@ struct MDRangeTuner : public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
     policy.impl_change_tile_size({std::get<Indices>(tuple)...});
   }
   template <typename... Properties>
-  void tune(Kokkos::MDRangePolicy<Properties...>& policy) {
+  auto tune(const Kokkos::MDRangePolicy<Properties...>& policy_in) {
+    Kokkos::MDRangePolicy<Properties...> policy(policy_in);
     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
       auto configuration = tuner.begin();
       set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
     }
+    return policy;
   }
   void end() {
     if (Kokkos::Tools::Experimental::have_tuning_tool()) {