17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 
   18 #include <Kokkos_Macros.hpp> 
   20               "Including non-public Kokkos header files is not allowed.");
 
   22 #ifndef KOKKOS_EXECPOLICY_HPP 
   23 #define KOKKOS_EXECPOLICY_HPP 
   25 #include <Kokkos_Core_fwd.hpp> 
   26 #include <impl/Kokkos_Traits.hpp> 
   27 #include <impl/Kokkos_Error.hpp> 
   28 #include <impl/Kokkos_AnalyzePolicy.hpp> 
   29 #include <Kokkos_BitManipulation.hpp> 
   30 #include <Kokkos_Concepts.hpp> 
   31 #include <Kokkos_TypeInfo.hpp> 
   32 #ifndef KOKKOS_ENABLE_IMPL_TYPEINFO 
   41 struct ParallelForTag {};
 
   42 struct ParallelScanTag {};
 
   43 struct ParallelReduceTag {};
 
   47   explicit ChunkSize(
int value_) : value(value_) {}
 
   48 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 
   49   template <
typename T = 
void>
 
   50   KOKKOS_DEPRECATED_WITH_COMMENT(
"ChunkSize should be constructed explicitly.")
 
   51   ChunkSize(
int value_) : value(value_) {}
 
   76 template <
class... Properties>
 
   77 class RangePolicy : 
public Impl::PolicyTraits<Properties...> {
 
   79   using traits = Impl::PolicyTraits<Properties...>;
 
   82   typename traits::execution_space m_space;
 
   83   typename traits::index_type m_begin;
 
   84   typename traits::index_type m_end;
 
   85   typename traits::index_type m_granularity;
 
   86   typename traits::index_type m_granularity_mask;
 
   88   template <
class... OtherProperties>
 
   94   using member_type      = 
typename traits::index_type;
 
   95   using index_type       = 
typename traits::index_type;
 
   97   KOKKOS_INLINE_FUNCTION 
const typename traits::execution_space& space()
 const {
 
  100   KOKKOS_INLINE_FUNCTION member_type begin()
 const { 
return m_begin; }
 
  101   KOKKOS_INLINE_FUNCTION member_type end()
 const { 
return m_end; }
 
  108   void operator()(
const int&)
 const {}
 
  110   template <
class... OtherProperties>
 
  111   RangePolicy(
const RangePolicy<OtherProperties...>& p)
 
  116         m_granularity(p.m_granularity),
 
  117         m_granularity_mask(p.m_granularity_mask) {}
 
  124         m_granularity_mask(0) {}
 
  127   template <
typename IndexType1, 
typename IndexType2,
 
  128             std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
 
  129                               std::is_convertible_v<IndexType2, member_type>),
 
  131   inline RangePolicy(
const IndexType1 work_begin, 
const IndexType2 work_end)
 
  132       : 
RangePolicy(
typename traits::execution_space(), work_begin, work_end) {}
 
  135   template <
typename IndexType1, 
typename IndexType2,
 
  136             std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
 
  137                               std::is_convertible_v<IndexType2, member_type>),
 
  139   inline RangePolicy(
const typename traits::execution_space& work_space,
 
  140                      const IndexType1 work_begin, 
const IndexType2 work_end)
 
  141       : m_space(work_space),
 
  145         m_granularity_mask(0) {
 
  146     check_conversion_safety(work_begin);
 
  147     check_conversion_safety(work_end);
 
  148     check_bounds_validity();
 
  149     set_auto_chunk_size();
 
  152   template <
typename IndexType1, 
typename IndexType2,
 
  153             std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
 
  154                               std::is_convertible_v<IndexType2, member_type>),
 
  156   RangePolicy(
const typename traits::execution_space& work_space,
 
  157               const IndexType1 work_begin, 
const IndexType2 work_end,
 
  159       : m_space(work_space),
 
  163         m_granularity_mask(0) {
 
  164     check_conversion_safety(work_begin);
 
  165     check_conversion_safety(work_end);
 
  166     check_bounds_validity();
 
  171   template <
typename IndexType1, 
typename IndexType2, 
typename... Args,
 
  172             std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
 
  173                               std::is_convertible_v<IndexType2, member_type>),
 
  175   RangePolicy(
const IndexType1 work_begin, 
const IndexType2 work_end,
 
  176               const ChunkSize chunk_size)
 
  177       : 
RangePolicy(
typename traits::execution_space(), work_begin, work_end,
 
  181 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 
  182   KOKKOS_DEPRECATED_WITH_COMMENT(
"Use set_chunk_size instead")
 
  183   inline 
void set(ChunkSize chunksize) {
 
  184     m_granularity      = chunksize.value;
 
  185     m_granularity_mask = m_granularity - 1;
 
  191   inline member_type 
chunk_size()
 const { 
return m_granularity; }
 
  196     m_granularity_mask = m_granularity - 1;
 
  202   inline void set_auto_chunk_size() {
 
  203 #ifdef KOKKOS_ENABLE_SYCL 
  204     if (std::is_same_v<typename traits::execution_space, Kokkos::SYCL>) {
 
  208       m_granularity_mask = 0;
 
  212     auto concurrency = 
static_cast<int64_t
>(m_space.concurrency());
 
  213     if (concurrency == 0) concurrency = 1;
 
  215     if (m_granularity > 0 &&
 
  216         !Kokkos::has_single_bit(static_cast<unsigned>(m_granularity))) {
 
  217       Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
 
  220     int64_t new_chunk_size = 1;
 
  221     while (new_chunk_size * 100 * concurrency <
 
  222            static_cast<int64_t>(m_end - m_begin))
 
  224     if (new_chunk_size < 128) {
 
  226       while ((new_chunk_size * 40 * concurrency <
 
  227               static_cast<int64_t>(m_end - m_begin)) &&
 
  228              (new_chunk_size < 128))
 
  231     m_granularity      = new_chunk_size;
 
  232     m_granularity_mask = m_granularity - 1;
 
  235   void check_bounds_validity() {
 
  236     if (m_end < m_begin) {
 
  237       std::string msg = 
"Kokkos::RangePolicy bounds error: The lower bound (" +
 
  238                         std::to_string(m_begin) +
 
  239                         ") is greater than the upper bound (" +
 
  240                         std::to_string(m_end) + 
").\n";
 
  241 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4 
  242       Kokkos::abort(msg.c_str());
 
  246 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS 
  247       Kokkos::Impl::log_warning(msg);
 
  253   template <
typename IndexType>
 
  254   static void check_conversion_safety([[maybe_unused]] 
const IndexType bound) {
 
  256     if constexpr (std::is_convertible_v<member_type, IndexType>) {
 
  257 #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \ 
  258     defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS) 
  261       if constexpr (std::is_arithmetic_v<member_type> &&
 
  262                     (std::is_signed_v<IndexType> !=
 
  263                      std::is_signed_v<member_type>)) {
 
  265         if constexpr (std::is_signed_v<IndexType>)
 
  266           warn |= (bound < static_cast<IndexType>(
 
  267                                std::numeric_limits<member_type>::min()));
 
  270         if constexpr (std::is_signed_v<member_type>)
 
  271           warn |= (bound > static_cast<IndexType>(
 
  272                                std::numeric_limits<member_type>::max()));
 
  277           (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
 
  281             "Kokkos::RangePolicy bound type error: an unsafe implicit " 
  282             "conversion is performed on a bound (" +
 
  283             std::to_string(bound) +
 
  284             "), which may not preserve its original value.\n";
 
  286 #ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4 
  287         Kokkos::abort(msg.c_str());
 
  290 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS 
  291         Kokkos::Impl::log_warning(msg);
 
  304     using work_tag    = 
typename RangePolicy<Properties...>::work_tag;
 
  305     using member_type = 
typename RangePolicy<Properties...>::member_type;
 
  307     KOKKOS_INLINE_FUNCTION member_type begin()
 const { 
return m_begin; }
 
  308     KOKKOS_INLINE_FUNCTION member_type end()
 const { 
return m_end; }
 
  314     KOKKOS_INLINE_FUNCTION
 
  317         : m_begin(0), m_end(0) {
 
  320         const member_type work_part =
 
  321             ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
 
  322              range.m_granularity_mask) &
 
  323             ~member_type(range.m_granularity_mask);
 
  325         m_begin = range.begin() + work_part * part_rank;
 
  326         m_end   = m_begin + work_part;
 
  328         if (range.end() < m_begin) m_begin = range.end();
 
  329         if (range.end() < m_end) m_end = range.end();
 
  341 RangePolicy() -> RangePolicy<>;
 
  343 RangePolicy(int64_t, int64_t) -> RangePolicy<>;
 
  344 RangePolicy(int64_t, int64_t, ChunkSize 
const&) -> RangePolicy<>;
 
  346 RangePolicy(DefaultExecutionSpace 
const&, int64_t, int64_t) -> RangePolicy<>;
 
  347 RangePolicy(DefaultExecutionSpace 
const&, int64_t, int64_t, ChunkSize 
const&)
 
  350 template <
typename ES, 
typename = std::enable_if_t<is_execution_space_v<ES>>>
 
  351 RangePolicy(ES 
const&, int64_t, int64_t) -> RangePolicy<ES>;
 
  353 template <
typename ES, 
typename = std::enable_if_t<is_execution_space_v<ES>>>
 
  354 RangePolicy(ES 
const&, int64_t, int64_t, ChunkSize 
const&) -> RangePolicy<ES>;
 
  365 template <
class ExecSpace, 
class... Properties>
 
  366 class TeamPolicyInternal : 
public Impl::PolicyTraits<Properties...> {
 
  368   using traits = Impl::PolicyTraits<Properties...>;
 
  371   using index_type = 
typename traits::index_type;
 
  384   template <
class FunctorType>
 
  385   static int team_size_max(
const FunctorType&);
 
  397   template <
class FunctorType>
 
  398   static int team_size_recommended(
const FunctorType&);
 
  400   template <
class FunctorType>
 
  401   static int team_size_recommended(
const FunctorType&, 
const int&);
 
  403   template <
class FunctorType>
 
  404   int team_size_recommended(
const FunctorType& functor,
 
  405                             const int vector_length);
 
  409   TeamPolicyInternal(
const typename traits::execution_space&,
 
  410                      int league_size_request, 
int team_size_request,
 
  411                      int vector_length_request = 1);
 
  413   TeamPolicyInternal(
const typename traits::execution_space&,
 
  414                      int league_size_request, 
const Kokkos::AUTO_t&,
 
  415                      int vector_length_request = 1);
 
  419   TeamPolicyInternal(
int league_size_request, 
int team_size_request,
 
  420                      int vector_length_request = 1);
 
  422   TeamPolicyInternal(
int league_size_request, 
const Kokkos::AUTO_t&,
 
  423                      int vector_length_request = 1);
 
  434   KOKKOS_INLINE_FUNCTION 
int league_size() 
const;
 
  441   KOKKOS_INLINE_FUNCTION 
int team_size() 
const;
 
  445   inline bool impl_auto_team_size() 
const;
 
  448   inline bool impl_auto_vector_length() 
const;
 
  450   static int vector_length_max();
 
  452   KOKKOS_INLINE_FUNCTION 
int impl_vector_length() 
const;
 
  454   inline typename traits::index_type chunk_size() 
const;
 
  456   inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
 
  463     KOKKOS_INLINE_FUNCTION
 
  464     typename traits::execution_space::scratch_memory_space team_shmem() 
const;
 
  467     KOKKOS_INLINE_FUNCTION 
int league_rank() 
const;
 
  470     KOKKOS_INLINE_FUNCTION 
int league_size() 
const;
 
  473     KOKKOS_INLINE_FUNCTION 
int team_rank() 
const;
 
  476     KOKKOS_INLINE_FUNCTION 
int team_size() 
const;
 
  479     KOKKOS_INLINE_FUNCTION 
void team_barrier() 
const;
 
  483     template <
class JoinOp>
 
  484     KOKKOS_INLINE_FUNCTION 
typename JoinOp::value_type team_reduce(
 
  485         const typename JoinOp::value_type, 
const JoinOp&) 
const;
 
  492     template <
typename Type>
 
  493     KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value) 
const;
 
  504     template <
typename Type>
 
  505     KOKKOS_INLINE_FUNCTION Type team_scan(
const Type& value,
 
  506                                           Type* 
const global_accum) 
const;
 
  510 struct PerTeamValue {
 
  512   PerTeamValue(
size_t arg);
 
  515 struct PerThreadValue {
 
  517   PerThreadValue(
size_t arg);
 
  520 template <
class iType, 
class... Args>
 
  521 struct ExtractVectorLength {
 
  522   static inline iType value(
 
  523       std::enable_if_t<std::is_integral_v<iType>, iType> val, Args...) {
 
  526   static inline std::enable_if_t<!std::is_integral_v<iType>, 
int> value(
 
  527       std::enable_if_t<!std::is_integral_v<iType>, iType>, Args...) {
 
  532 template <
class iType, 
class... Args>
 
  533 inline std::enable_if_t<std::is_integral_v<iType>, iType> extract_vector_length(
 
  534     iType val, Args...) {
 
  538 template <
class iType, 
class... Args>
 
  539 inline std::enable_if_t<!std::is_integral_v<iType>, 
int> extract_vector_length(
 
  546 Impl::PerTeamValue PerTeam(
const size_t& arg);
 
  547 Impl::PerThreadValue PerThread(
const size_t& arg);
 
  549 struct ScratchRequest {
 
  555   inline ScratchRequest(
const int& level_,
 
  556                         const Impl::PerTeamValue& team_value) {
 
  558     per_team   = team_value.value;
 
  562   inline ScratchRequest(
const int& level_,
 
  563                         const Impl::PerThreadValue& thread_value) {
 
  566     per_thread = thread_value.value;
 
  569   inline ScratchRequest(
const int& level_, 
const Impl::PerTeamValue& team_value,
 
  570                         const Impl::PerThreadValue& thread_value) {
 
  572     per_team   = team_value.value;
 
  573     per_thread = thread_value.value;
 
  576   inline ScratchRequest(
const int& level_,
 
  577                         const Impl::PerThreadValue& thread_value,
 
  578                         const Impl::PerTeamValue& team_value) {
 
  580     per_team   = team_value.value;
 
  581     per_thread = thread_value.value;
 
  586 void team_policy_check_valid_storage_level_argument(
int level);
 
  614 template <
class... Properties>
 
  616     : 
public Impl::TeamPolicyInternal<
 
  617           typename Impl::PolicyTraits<Properties...>::execution_space,
 
  619   using internal_policy = Impl::TeamPolicyInternal<
 
  620       typename Impl::PolicyTraits<Properties...>::execution_space,
 
  623   template <
class... OtherProperties>
 
  627   using traits = Impl::PolicyTraits<Properties...>;
 
  635              int league_size_request, 
int team_size_request,
 
  636              int vector_length_request = 1)
 
  637       : internal_policy(space_, league_size_request, team_size_request,
 
  638                         vector_length_request) {}
 
  640   TeamPolicy(
const typename traits::execution_space& space_,
 
  641              int league_size_request, 
const Kokkos::AUTO_t&,
 
  642              int vector_length_request = 1)
 
  643       : internal_policy(space_, league_size_request, Kokkos::AUTO(),
 
  644                         vector_length_request) {}
 
  646   TeamPolicy(
const typename traits::execution_space& space_,
 
  647              int league_size_request, 
const Kokkos::AUTO_t&,
 
  648              const Kokkos::AUTO_t&)
 
  649       : internal_policy(space_, league_size_request, Kokkos::AUTO(),
 
  651   TeamPolicy(
const typename traits::execution_space& space_,
 
  652              int league_size_request, 
const int team_size_request,
 
  653              const Kokkos::AUTO_t&)
 
  654       : internal_policy(space_, league_size_request, team_size_request,
 
  659              int vector_length_request = 1)
 
  660       : internal_policy(league_size_request, team_size_request,
 
  661                         vector_length_request) {}
 
  663   TeamPolicy(
int league_size_request, 
const Kokkos::AUTO_t&,
 
  664              int vector_length_request = 1)
 
  665       : internal_policy(league_size_request, Kokkos::AUTO(),
 
  666                         vector_length_request) {}
 
  668   TeamPolicy(
int league_size_request, 
const Kokkos::AUTO_t&,
 
  669              const Kokkos::AUTO_t&)
 
  670       : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
 
  671   TeamPolicy(
int league_size_request, 
const int team_size_request,
 
  672              const Kokkos::AUTO_t&)
 
  673       : internal_policy(league_size_request, team_size_request,
 
  676   template <
class... OtherProperties>
 
  677   TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
 
  680     internal_policy::traits::operator=(p);
 
  684   TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
 
  687   inline TeamPolicy& set_chunk_size(
int chunk) {
 
  689         std::is_same_v<decltype(internal_policy::set_chunk_size(chunk)),
 
  691         "internal set_chunk_size should return a reference");
 
  692     return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
 
  695   inline TeamPolicy& set_scratch_size(
const int& level,
 
  696                                       const Impl::PerTeamValue& per_team) {
 
  697     static_assert(std::is_same_v<decltype(internal_policy::set_scratch_size(
 
  700                   "internal set_chunk_size should return a reference");
 
  702     team_policy_check_valid_storage_level_argument(level);
 
  703     return static_cast<TeamPolicy&
>(
 
  704         internal_policy::set_scratch_size(level, per_team));
 
  706   inline TeamPolicy& set_scratch_size(
const int& level,
 
  707                                       const Impl::PerThreadValue& per_thread) {
 
  708     team_policy_check_valid_storage_level_argument(level);
 
  709     return static_cast<TeamPolicy&
>(
 
  710         internal_policy::set_scratch_size(level, per_thread));
 
  712   inline TeamPolicy& set_scratch_size(
const int& level,
 
  713                                       const Impl::PerTeamValue& per_team,
 
  714                                       const Impl::PerThreadValue& per_thread) {
 
  715     team_policy_check_valid_storage_level_argument(level);
 
  716     return static_cast<TeamPolicy&
>(
 
  717         internal_policy::set_scratch_size(level, per_team, per_thread));
 
  719   inline TeamPolicy& set_scratch_size(
const int& level,
 
  720                                       const Impl::PerThreadValue& per_thread,
 
  721                                       const Impl::PerTeamValue& per_team) {
 
  722     team_policy_check_valid_storage_level_argument(level);
 
  723     return static_cast<TeamPolicy&
>(
 
  724         internal_policy::set_scratch_size(level, per_team, per_thread));
 
  730 TeamPolicy() -> TeamPolicy<>;
 
  732 TeamPolicy(
int, 
int) -> TeamPolicy<>;
 
  733 TeamPolicy(
int, 
int, 
int) -> TeamPolicy<>;
 
  734 TeamPolicy(
int, Kokkos::AUTO_t 
const&) -> TeamPolicy<>;
 
  735 TeamPolicy(
int, Kokkos::AUTO_t 
const&, 
int) -> TeamPolicy<>;
 
  736 TeamPolicy(
int, Kokkos::AUTO_t 
const&, Kokkos::AUTO_t 
const&) -> TeamPolicy<>;
 
  737 TeamPolicy(
int, 
int, Kokkos::AUTO_t 
const&) -> TeamPolicy<>;
 
  741 TeamPolicy(DefaultExecutionSpace 
const&, 
int, 
int) -> TeamPolicy<>;
 
  742 TeamPolicy(DefaultExecutionSpace 
const&, 
int, 
int, 
int) -> TeamPolicy<>;
 
  743 TeamPolicy(DefaultExecutionSpace 
const&, 
int, Kokkos::AUTO_t 
const&)
 
  745 TeamPolicy(DefaultExecutionSpace 
const&, 
int, Kokkos::AUTO_t 
const&, 
int)
 
  747 TeamPolicy(DefaultExecutionSpace 
const&, 
int, Kokkos::AUTO_t 
const&,
 
  748            Kokkos::AUTO_t 
const&) -> TeamPolicy<>;
 
  749 TeamPolicy(DefaultExecutionSpace 
const&, 
int, 
int, Kokkos::AUTO_t 
const&)
 
  754 template <
typename ES,
 
  755           typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
 
  756 TeamPolicy(ES 
const&, 
int, 
int) -> TeamPolicy<ES>;
 
  758 template <
typename ES,
 
  759           typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
 
  760 TeamPolicy(ES 
const&, 
int, 
int, 
int) -> TeamPolicy<ES>;
 
  762 template <
typename ES,
 
  763           typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
 
  764 TeamPolicy(ES 
const&, 
int, Kokkos::AUTO_t 
const&) -> TeamPolicy<ES>;
 
  766 template <
typename ES,
 
  767           typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
 
  768 TeamPolicy(ES 
const&, 
int, Kokkos::AUTO_t 
const&, 
int) -> TeamPolicy<ES>;
 
  770 template <
typename ES,
 
  771           typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
 
  772 TeamPolicy(ES 
const&, 
int, Kokkos::AUTO_t 
const&, Kokkos::AUTO_t 
const&)
 
  775 template <
typename ES,
 
  776           typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
 
  777 TeamPolicy(ES 
const&, 
int, 
int, Kokkos::AUTO_t 
const&) -> TeamPolicy<ES>;
 
  781 template <
typename iType, 
class TeamMemberType>
 
  782 struct TeamThreadRangeBoundariesStruct {
 
  784   KOKKOS_INLINE_FUNCTION 
static iType ibegin(
const iType& arg_begin,
 
  785                                              const iType& arg_end,
 
  786                                              const iType& arg_rank,
 
  787                                              const iType& arg_size) {
 
  789            ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
 
  792   KOKKOS_INLINE_FUNCTION 
static iType iend(
const iType& arg_begin,
 
  793                                            const iType& arg_end,
 
  794                                            const iType& arg_rank,
 
  795                                            const iType& arg_size) {
 
  798         ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
 
  799     return end_ < arg_end ? end_ : arg_end;
 
  803   using index_type = iType;
 
  806   enum { increment = 1 };
 
  807   const TeamMemberType& member;
 
  809   KOKKOS_INLINE_FUNCTION
 
  810   TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
 
  811                                   const iType& arg_count)
 
  812       : start(ibegin(0, arg_count, arg_thread.team_rank(),
 
  813                      arg_thread.team_size())),
 
  814         end(iend(0, arg_count, arg_thread.team_rank(), arg_thread.team_size())),
 
  815         member(arg_thread) {}
 
  817   KOKKOS_INLINE_FUNCTION
 
  818   TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
 
  819                                   const iType& arg_begin, 
const iType& arg_end)
 
  820       : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
 
  821                      arg_thread.team_size())),
 
  822         end(iend(arg_begin, arg_end, arg_thread.team_rank(),
 
  823                  arg_thread.team_size())),
 
  824         member(arg_thread) {}
 
  827 template <
typename iType, 
class TeamMemberType>
 
  828 struct TeamVectorRangeBoundariesStruct {
 
  830   KOKKOS_INLINE_FUNCTION 
static iType ibegin(
const iType& arg_begin,
 
  831                                              const iType& arg_end,
 
  832                                              const iType& arg_rank,
 
  833                                              const iType& arg_size) {
 
  835            ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
 
  838   KOKKOS_INLINE_FUNCTION 
static iType iend(
const iType& arg_begin,
 
  839                                            const iType& arg_end,
 
  840                                            const iType& arg_rank,
 
  841                                            const iType& arg_size) {
 
  844         ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
 
  845     return end_ < arg_end ? end_ : arg_end;
 
  849   using index_type = iType;
 
  852   enum { increment = 1 };
 
  853   const TeamMemberType& member;
 
  855   KOKKOS_INLINE_FUNCTION
 
  856   TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
 
  857                                   const iType& arg_count)
 
  858       : start(ibegin(0, arg_count, arg_thread.team_rank(),
 
  859                      arg_thread.team_size())),
 
  860         end(iend(0, arg_count, arg_thread.team_rank(), arg_thread.team_size())),
 
  861         member(arg_thread) {}
 
  863   KOKKOS_INLINE_FUNCTION
 
  864   TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
 
  865                                   const iType& arg_begin, 
const iType& arg_end)
 
  866       : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
 
  867                      arg_thread.team_size())),
 
  868         end(iend(arg_begin, arg_end, arg_thread.team_rank(),
 
  869                  arg_thread.team_size())),
 
  870         member(arg_thread) {}
 
  873 template <
typename iType, 
class TeamMemberType>
 
  874 struct ThreadVectorRangeBoundariesStruct {
 
  875   using index_type = iType;
 
  876   const index_type start;
 
  877   const index_type end;
 
  878   enum { increment = 1 };
 
  880   KOKKOS_INLINE_FUNCTION
 
  881   constexpr ThreadVectorRangeBoundariesStruct(
 
  882       const TeamMemberType, 
const index_type& arg_count) noexcept
 
  883       : start(static_cast<index_type>(0)), end(arg_count) {}
 
  885   KOKKOS_INLINE_FUNCTION
 
  886   constexpr ThreadVectorRangeBoundariesStruct(
 
  887       const TeamMemberType, 
const index_type& arg_begin,
 
  888       const index_type& arg_end) noexcept
 
  889       : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
 
  892 template <
class TeamMemberType>
 
  893 struct ThreadSingleStruct {
 
  894   const TeamMemberType& team_member;
 
  895   KOKKOS_INLINE_FUNCTION
 
  896   ThreadSingleStruct(
const TeamMemberType& team_member_)
 
  897       : team_member(team_member_) {}
 
  900 template <
class TeamMemberType>
 
  901 struct VectorSingleStruct {
 
  902   const TeamMemberType& team_member;
 
  903   KOKKOS_INLINE_FUNCTION
 
  904   VectorSingleStruct(
const TeamMemberType& team_member_)
 
  905       : team_member(team_member_) {}
 
  917 template <
typename iType, 
class TeamMemberType, 
class _never_use_this_overload>
 
  918 KOKKOS_INLINE_FUNCTION_DELETED
 
  919     Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
 
  920     TeamThreadRange(
const TeamMemberType&, 
const iType& count) = 
delete;
 
  929 template <
typename iType1, 
typename iType2, 
class TeamMemberType,
 
  930           class _never_use_this_overload>
 
  931 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
 
  932     std::common_type_t<iType1, iType2>, TeamMemberType>
 
  933 TeamThreadRange(
const TeamMemberType&, 
const iType1& begin,
 
  934                 const iType2& end) = 
delete;
 
  943 template <
typename iType, 
class TeamMemberType, 
class _never_use_this_overload>
 
  944 KOKKOS_INLINE_FUNCTION_DELETED
 
  945     Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
 
  946     TeamVectorRange(
const TeamMemberType&, 
const iType& count) = 
delete;
 
  955 template <
typename iType1, 
typename iType2, 
class TeamMemberType,
 
  956           class _never_use_this_overload>
 
  957 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
 
  958     std::common_type_t<iType1, iType2>, TeamMemberType>
 
  959 TeamVectorRange(
const TeamMemberType&, 
const iType1& begin,
 
  960                 const iType2& end) = 
delete;
 
  969 template <
typename iType, 
class TeamMemberType, 
class _never_use_this_overload>
 
  970 KOKKOS_INLINE_FUNCTION_DELETED
 
  971     Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
 
  972     ThreadVectorRange(
const TeamMemberType&, 
const iType& count) = 
delete;
 
  974 template <
typename iType1, 
typename iType2, 
class TeamMemberType,
 
  975           class _never_use_this_overload>
 
  976 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
 
  977     std::common_type_t<iType1, iType2>, TeamMemberType>
 
  978 ThreadVectorRange(
const TeamMemberType&, 
const iType1& arg_begin,
 
  979                   const iType2& arg_end) = 
delete;
 
  983 enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
 
  984 enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
 
  985 enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
 
  986 enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
 
  988 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
 
  989 struct HostBasedNestLevel;
 
  991 template <
typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
 
  992 struct AcceleratorBasedNestLevel;
 
 1002 template <
typename Rank, 
typename ExecSpace,
 
 1003           TeamMDRangeThreadAndVector ThreadAndVector>
 
 1004 struct ThreadAndVectorNestLevel;
 
 1006 struct NoReductionTag {};
 
 1008 template <
typename Rank, 
typename TeamMDPolicy, 
typename Lambda,
 
 1009           typename ReductionValueType>
 
 1010 KOKKOS_INLINE_FUNCTION 
void md_parallel_impl(TeamMDPolicy 
const& policy,
 
 1011                                              Lambda 
const& lambda,
 
 1012                                              ReductionValueType&& val);
 
 1015 template <
typename Rank, 
typename TeamHandle>
 
 1016 struct TeamThreadMDRange;
 
 1018 template <
unsigned N, Iterate OuterDir, Iterate InnerDir, 
typename TeamHandle>
 
 1019 struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
 
 1020   using NestLevelType  = int;
 
 1021   using BoundaryType   = int;
 
 1022   using TeamHandleType = TeamHandle;
 
 1023   using ExecutionSpace = 
typename TeamHandleType::execution_space;
 
 1024   using ArrayLayout    = 
typename ExecutionSpace::array_layout;
 
 1026   static constexpr NestLevelType total_nest_level =
 
 1027       Rank<N, OuterDir, InnerDir>::rank;
 
 1028   static constexpr Iterate iter    = OuterDir;
 
 1029   static constexpr 
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
 
 1030   static constexpr 
auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
 
 1032   static constexpr Iterate direction =
 
 1033       OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
 
 1034                                          ArrayLayout>::outer_iteration_pattern
 
 1037   template <
class... Args>
 
 1038   KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType 
const& team_, Args&&... args)
 
 1039       : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
 
 1040     static_assert(
sizeof...(Args) == total_nest_level);
 
 1043   TeamHandleType 
const& team;
 
 1044   BoundaryType boundaries[total_nest_level];
 
 1047 template <
typename TeamHandle, 
typename... Args>
 
 1048 KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle 
const&, Args&&...)
 
 1049     -> TeamThreadMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
 
 1051 template <
typename Rank, 
typename TeamHandle>
 
 1052 struct ThreadVectorMDRange;
 
 1054 template <
unsigned N, Iterate OuterDir, Iterate InnerDir, 
typename TeamHandle>
 
 1055 struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
 
 1056   using NestLevelType  = int;
 
 1057   using BoundaryType   = int;
 
 1058   using TeamHandleType = TeamHandle;
 
 1059   using ExecutionSpace = 
typename TeamHandleType::execution_space;
 
 1060   using ArrayLayout    = 
typename ExecutionSpace::array_layout;
 
 1062   static constexpr NestLevelType total_nest_level =
 
 1063       Rank<N, OuterDir, InnerDir>::rank;
 
 1064   static constexpr Iterate iter    = OuterDir;
 
 1065   static constexpr 
auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
 
 1066   static constexpr 
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
 
 1068   static constexpr Iterate direction =
 
 1069       OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
 
 1070                                          ArrayLayout>::outer_iteration_pattern
 
 1073   template <
class... Args>
 
 1074   KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType 
const& team_,
 
 1076       : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
 
 1077     static_assert(
sizeof...(Args) == total_nest_level);
 
 1080   TeamHandleType 
const& team;
 
 1081   BoundaryType boundaries[total_nest_level];
 
 1084 template <
typename TeamHandle, 
typename... Args>
 
 1085 KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle 
const&, Args&&...)
 
 1086     -> ThreadVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
 
 1088 template <
typename Rank, 
typename TeamHandle>
 
 1089 struct TeamVectorMDRange;
 
 1091 template <
unsigned N, Iterate OuterDir, Iterate InnerDir, 
typename TeamHandle>
 
 1092 struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
 
 1093   using NestLevelType  = int;
 
 1094   using BoundaryType   = int;
 
 1095   using TeamHandleType = TeamHandle;
 
 1096   using ExecutionSpace = 
typename TeamHandleType::execution_space;
 
 1097   using ArrayLayout    = 
typename ExecutionSpace::array_layout;
 
 1099   static constexpr NestLevelType total_nest_level =
 
 1100       Rank<N, OuterDir, InnerDir>::rank;
 
 1101   static constexpr Iterate iter    = OuterDir;
 
 1102   static constexpr 
auto par_thread = Impl::TeamMDRangeParThread::ParThread;
 
 1103   static constexpr 
auto par_vector = Impl::TeamMDRangeParVector::ParVector;
 
 1105   static constexpr Iterate direction =
 
 1106       iter == Iterate::Default ? Impl::layout_iterate_type_selector<
 
 1107                                      ArrayLayout>::outer_iteration_pattern
 
 1110   template <
class... Args>
 
 1111   KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType 
const& team_,
 
 1113       : team(team_), boundaries{
static_cast<BoundaryType
>(args)...} {
 
 1114     static_assert(
sizeof...(Args) == total_nest_level);
 
 1117   TeamHandleType 
const& team;
 
 1118   BoundaryType boundaries[total_nest_level];
 
 1121 template <
typename TeamHandle, 
typename... Args>
 
 1122 KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle 
const&, Args&&...)
 
 1123     -> TeamVectorMDRange<Rank<
sizeof...(Args), Iterate::Default>, TeamHandle>;
 
 1125 template <
typename Rank, 
typename TeamHandle, 
typename Lambda,
 
 1126           typename ReducerValueType>
 
 1127 KOKKOS_INLINE_FUNCTION 
void parallel_reduce(
 
 1128     TeamThreadMDRange<Rank, TeamHandle> 
const& policy, Lambda 
const& lambda,
 
 1129     ReducerValueType& val) {
 
 1131                 !std::is_array_v<ReducerValueType> &&
 
 1132                     !std::is_pointer_v<ReducerValueType> &&
 
 1133                     !Kokkos::is_reducer_v<ReducerValueType>,
 
 1134                 "Only scalar return types are allowed!");
 
 1136   val = ReducerValueType{};
 
 1137   Impl::md_parallel_impl<Rank>(policy, lambda, val);
 
 1138   policy.team.team_reduce(
 
 1139       Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
 
 1142 template <
typename Rank, 
typename TeamHandle, 
typename Lambda>
 
 1143 KOKKOS_INLINE_FUNCTION 
void parallel_for(
 
 1144     TeamThreadMDRange<Rank, TeamHandle> 
const& policy, Lambda 
const& lambda) {
 
 1145   Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
 
 1148 template <
typename Rank, 
typename TeamHandle, 
typename Lambda,
 
 1149           typename ReducerValueType>
 
 1150 KOKKOS_INLINE_FUNCTION 
void parallel_reduce(
 
 1151     ThreadVectorMDRange<Rank, TeamHandle> 
const& policy, Lambda 
const& lambda,
 
 1152     ReducerValueType& val) {
 
 1154                 !std::is_array_v<ReducerValueType> &&
 
 1155                     !std::is_pointer_v<ReducerValueType> &&
 
 1156                     !Kokkos::is_reducer_v<ReducerValueType>,
 
 1157                 "Only a scalar return types are allowed!");
 
 1159   val = ReducerValueType{};
 
 1160   Impl::md_parallel_impl<Rank>(policy, lambda, val);
 
 1162 #ifdef KOKKOS_ENABLE_CUDA
 
 1163                 || std::is_same_v<
typename TeamHandle::execution_space,
 
 1165 #elif defined(KOKKOS_ENABLE_HIP)
 
 1166                 || std::is_same_v<
typename TeamHandle::execution_space,
 
 1168 #elif defined(KOKKOS_ENABLE_SYCL)
 
 1169                 || std::is_same_v<
typename TeamHandle::execution_space,
 
 1173     policy.team.vector_reduce(
 
 1174         Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
 
 1178 template <
typename Rank, 
typename TeamHandle, 
typename Lambda>
 
 1179 KOKKOS_INLINE_FUNCTION 
void parallel_for(
 
 1180     ThreadVectorMDRange<Rank, TeamHandle> 
const& policy, Lambda 
const& lambda) {
 
 1181   Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
 
 1184 template <
typename Rank, 
typename TeamHandle, 
typename Lambda,
 
 1185           typename ReducerValueType>
 
 1186 KOKKOS_INLINE_FUNCTION 
void parallel_reduce(
 
 1187     TeamVectorMDRange<Rank, TeamHandle> 
const& policy, Lambda 
const& lambda,
 
 1188     ReducerValueType& val) {
 
 1190                 !std::is_array_v<ReducerValueType> &&
 
 1191                     !std::is_pointer_v<ReducerValueType> &&
 
 1192                     !Kokkos::is_reducer_v<ReducerValueType>,
 
 1193                 "Only a scalar return types are allowed!");
 
 1195   val = ReducerValueType{};
 
 1196   Impl::md_parallel_impl<Rank>(policy, lambda, val);
 
 1198 #ifdef KOKKOS_ENABLE_CUDA
 
 1199                 || std::is_same_v<
typename TeamHandle::execution_space,
 
 1201 #elif defined(KOKKOS_ENABLE_HIP)
 
 1202                 || std::is_same_v<
typename TeamHandle::execution_space,
 
 1204 #elif defined(KOKKOS_ENABLE_SYCL)
 
 1205                 || std::is_same_v<
typename TeamHandle::execution_space,
 
 1209     policy.team.vector_reduce(
 
 1210         Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
 
 1212   policy.team.team_reduce(
 
 1213       Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
 
 1216 template <
typename Rank, 
typename TeamHandle, 
typename Lambda>
 
 1217 KOKKOS_INLINE_FUNCTION 
void parallel_for(
 
 1218     TeamVectorMDRange<Rank, TeamHandle> 
const& policy, Lambda 
const& lambda) {
 
 1219   Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
 
 1224 template <
typename FunctorType, 
typename TagType,
 
 1225           bool HasTag = !std::is_void_v<TagType>>
 
 1226 struct ParallelConstructName;
 
 1228 template <
typename FunctorType, 
typename TagType>
 
 1229 struct ParallelConstructName<FunctorType, TagType, true> {
 
 1230   ParallelConstructName(std::string 
const& label) : label_ref(label) {
 
 1231     if (label.empty()) {
 
 1232 #ifdef KOKKOS_ENABLE_IMPL_TYPEINFO 
 1234           std::string(TypeInfo<std::remove_const_t<FunctorType>>::name()) +
 
 1235           "/" + std::string(TypeInfo<TagType>::name());
 
 1237       default_name = std::string(
typeid(FunctorType).name()) + 
"/" +
 
 1238                      typeid(TagType).name();
 
 1242   std::string 
const& 
get() {
 
 1243     return (label_ref.empty()) ? default_name : label_ref;
 
 1245   std::string 
const& label_ref;
 
 1246   std::string default_name;
 
 1249 template <
typename FunctorType, 
typename TagType>
 
 1250 struct ParallelConstructName<FunctorType, TagType, false> {
 
 1251   ParallelConstructName(std::string 
const& label) : label_ref(label) {
 
 1252     if (label.empty()) {
 
 1253 #ifdef KOKKOS_ENABLE_IMPL_TYPEINFO 
 1254       default_name = TypeInfo<std::remove_const_t<FunctorType>>::name();
 
 1256       default_name = 
typeid(FunctorType).name();
 
 1260   std::string 
const& 
get() {
 
 1261     return (label_ref.empty()) ? default_name : label_ref;
 
 1263   std::string 
const& label_ref;
 
 1264   std::string default_name;
 
 1275 template <
class PatternTag, 
class... Args>
 
 1276 struct PatternImplSpecializationFromTag;
 
 1278 template <
class... Args>
 
 1279 struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
 
 1280     : type_identity<ParallelFor<Args...>> {};
 
 1282 template <
class... Args>
 
 1283 struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
 
 1284     : type_identity<ParallelReduce<Args...>> {};
 
 1286 template <
class... Args>
 
 1287 struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
 
 1288     : type_identity<ParallelScan<Args...>> {};
 
 1290 template <
class PatternImpl>
 
 1291 struct PatternTagFromImplSpecialization;
 
 1293 template <
class... Args>
 
 1294 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
 
 1295     : type_identity<ParallelForTag> {};
 
 1297 template <
class... Args>
 
 1298 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
 
 1299     : type_identity<ParallelReduceTag> {};
 
 1301 template <
class... Args>
 
 1302 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
 
 1303     : type_identity<ParallelScanTag> {};
 
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space. 
 
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range. 
 
member_type chunk_size() const 
return chunk_size 
 
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space. 
 
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range. 
 
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size. 
 
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value 
 
Execution policy for work over a range of an integral type. 
 
Subrange for a partition's rank and size. 
 
Execution policy for parallel work over a league of teams of threads. 
 
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range. 
 
Parallel execution of a functor calls the functor once with each member of the execution policy...