20 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 
   21 #include <Kokkos_Macros.hpp> 
   23               "Including non-public Kokkos header files is not allowed.");
 
   25 #ifndef KOKKOS_PARALLEL_HPP 
   26 #define KOKKOS_PARALLEL_HPP 
   28 #include <Kokkos_Core_fwd.hpp> 
   29 #include <Kokkos_DetectionIdiom.hpp> 
   30 #include <Kokkos_ExecPolicy.hpp> 
   31 #include <Kokkos_View.hpp> 
   33 #include <impl/Kokkos_Tools.hpp> 
   34 #include <impl/Kokkos_Tools_Generic.hpp> 
   36 #include <impl/Kokkos_Traits.hpp> 
   37 #include <impl/Kokkos_FunctorAnalysis.hpp> 
   40 #include <type_traits> 
   50 using execution_space_t = 
typename T::execution_space;
 
   53 using device_type_t = 
typename T::device_type;
 
   64 template <
class Functor, 
class Policy>
 
   65 struct FunctorPolicyExecutionSpace {
 
   66   using policy_execution_space  = detected_t<execution_space_t, Policy>;
 
   67   using functor_execution_space = detected_t<execution_space_t, Functor>;
 
   68   using functor_device_type     = detected_t<device_type_t, Functor>;
 
   69   using functor_device_type_execution_space =
 
   70       detected_t<execution_space_t, functor_device_type>;
 
   73       !is_detected<execution_space_t, Policy>::value ||
 
   74           !is_detected<execution_space_t, Functor>::value ||
 
   75           std::is_same_v<policy_execution_space, functor_execution_space>,
 
   76       "A policy with an execution space and a functor with an execution space " 
   77       "are given but the execution space types do not match!");
 
   78   static_assert(!is_detected<execution_space_t, Policy>::value ||
 
   79                     !is_detected<device_type_t, Functor>::value ||
 
   80                     std::is_same_v<policy_execution_space,
 
   81                                    functor_device_type_execution_space>,
 
   82                 "A policy with an execution space and a functor with a device " 
   83                 "type are given but the execution space types do not match!");
 
   84   static_assert(!is_detected<device_type_t, Functor>::value ||
 
   85                     !is_detected<execution_space_t, Functor>::value ||
 
   86                     std::is_same_v<functor_device_type_execution_space,
 
   87                                    functor_execution_space>,
 
   88                 "A functor with both an execution space and device type is " 
   89                 "given but their execution space types do not match!");
 
   91   using execution_space = detected_or_t<
 
   94               is_detected<device_type_t, Functor>::value,
 
   95               detected_t<execution_space_t, detected_t<device_type_t, Functor>>,
 
   96               Kokkos::DefaultExecutionSpace>,
 
   97           execution_space_t, Functor>,
 
   98       execution_space_t, Policy>;
 
  131     class ExecPolicy, 
class FunctorType,
 
  132     class Enable = std::enable_if_t<is_execution_policy<ExecPolicy>::value>>
 
  133 inline void parallel_for(
const std::string& str, 
const ExecPolicy& policy,
 
  134                          const FunctorType& functor) {
 
  138   const auto& response =
 
  139       Kokkos::Tools::Impl::begin_parallel_for(policy, functor, str, kpID);
 
  140   const auto& inner_policy = response.policy;
 
  143       Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
 
  144           Impl::ParallelFor<FunctorType, ExecPolicy>>(functor, inner_policy);
 
  148   Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
 
  151 template <
class ExecPolicy, 
class FunctorType>
 
  152 inline void parallel_for(
 
  153     const ExecPolicy& policy, 
const FunctorType& functor,
 
  154     std::enable_if_t<is_execution_policy<ExecPolicy>::value>* = 
nullptr) {
 
  155   Kokkos::parallel_for(
"", policy, functor);
 
  158 template <
class FunctorType>
 
  159 inline void parallel_for(
const std::string& str, 
const size_t work_count,
 
  160                          const FunctorType& functor) {
 
  161   using execution_space =
 
  162       typename Impl::FunctorPolicyExecutionSpace<FunctorType,
 
  163                                                  void>::execution_space;
 
  164   using policy = RangePolicy<execution_space>;
 
  166   policy execution_policy = policy(0, work_count);
 
  167   ::Kokkos::parallel_for(str, execution_policy, functor);
 
  170 template <
class FunctorType>
 
  171 inline void parallel_for(
const size_t work_count, 
const FunctorType& functor) {
 
  172   ::Kokkos::parallel_for(
"", work_count, functor);
 
  177 #include <Kokkos_Parallel_Reduce.hpp> 
  348 template <
class ExecutionPolicy, 
class FunctorType,
 
  350               std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
 
  351 inline void parallel_scan(
const std::string& str, 
const ExecutionPolicy& policy,
 
  352                           const FunctorType& functor) {
 
  355   const auto& response =
 
  356       Kokkos::Tools::Impl::begin_parallel_scan(policy, functor, str, kpID);
 
  357   const auto& inner_policy = response.policy;
 
  360       Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
 
  361           Impl::ParallelScan<FunctorType, ExecutionPolicy>>(functor,
 
  366   Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
 
  369 template <
class ExecutionPolicy, 
class FunctorType>
 
  370 inline void parallel_scan(
 
  371     const ExecutionPolicy& policy, 
const FunctorType& functor,
 
  372     std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = 
nullptr) {
 
  373   ::Kokkos::parallel_scan(
"", policy, functor);
 
  376 template <
class FunctorType>
 
  377 inline void parallel_scan(
const std::string& str, 
const size_t work_count,
 
  378                           const FunctorType& functor) {
 
  379   using execution_space =
 
  381                                                          void>::execution_space;
 
  385   policy execution_policy(0, work_count);
 
  386   parallel_scan(str, execution_policy, functor);
 
  389 template <
class FunctorType>
 
  390 inline void parallel_scan(
const size_t work_count, 
const FunctorType& functor) {
 
  391   ::Kokkos::parallel_scan(
"", work_count, functor);
 
  394 template <
class ExecutionPolicy, 
class FunctorType, 
class ReturnType,
 
  396               std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
 
  397 inline void parallel_scan(
const std::string& str, 
const ExecutionPolicy& policy,
 
  398                           const FunctorType& functor,
 
  401   ExecutionPolicy inner_policy = policy;
 
  402   Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
 
  404   if constexpr (Kokkos::is_view<ReturnType>::value) {
 
  406         Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
 
  407             Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
 
  408                                         typename ReturnType::value_type>>(
 
  409             functor, inner_policy, return_value);
 
  412     Kokkos::View<ReturnType, Kokkos::HostSpace> view(&return_value);
 
  414         Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
 
  415             Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
 
  421   Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
 
  423   if (!Kokkos::is_view<ReturnType>::value)
 
  424     policy.space().fence(
 
  425         "Kokkos::parallel_scan: fence due to result being a value, not a view");
 
  428 template <
class ExecutionPolicy, 
class FunctorType, 
class ReturnType>
 
  429 inline void parallel_scan(
 
  430     const ExecutionPolicy& policy, 
const FunctorType& functor,
 
  432     std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = 
nullptr) {
 
  433   ::Kokkos::parallel_scan(
"", policy, functor, return_value);
 
  436 template <
class FunctorType, 
class ReturnType>
 
  437 inline void parallel_scan(
const std::string& str, 
const size_t work_count,
 
  438                           const FunctorType& functor,
 
  440   using execution_space =
 
  442                                                          void>::execution_space;
 
  446   policy execution_policy(0, work_count);
 
  447   parallel_scan(str, execution_policy, functor, return_value);
 
  450 template <
class FunctorType, 
class ReturnType>
 
  451 inline void parallel_scan(
const size_t work_count, 
const FunctorType& functor,
 
  453   ::Kokkos::parallel_scan(
"", work_count, functor, return_value);
 
  464 template <
class FunctorType,
 
  465           bool HasTeamShmemSize =
 
  466               has_member_team_shmem_size<FunctorType>::value,
 
  467           bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
 
  468 struct FunctorTeamShmemSize {
 
  469   KOKKOS_INLINE_FUNCTION 
static size_t value(
const FunctorType&, 
int) {
 
  474 template <
class FunctorType>
 
  475 struct FunctorTeamShmemSize<FunctorType, true, false> {
 
  476   static inline size_t value(
const FunctorType& f, 
int team_size) {
 
  477     return f.team_shmem_size(team_size);
 
  481 template <
class FunctorType>
 
  482 struct FunctorTeamShmemSize<FunctorType, false, true> {
 
  483   static inline size_t value(
const FunctorType& f, 
int team_size) {
 
  484     return f.shmem_size(team_size);
 
  487 template <
class FunctorType>
 
  488 struct FunctorTeamShmemSize<FunctorType, true, true> {
 
  489   static inline size_t value(
const FunctorType& , 
int ) {
 
  491         "Functor with both team_shmem_size and shmem_size defined is " 
Given a Functor and Execution Policy query an execution space. 
 
Execution policy for work over a range of an integral type.