17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 
   18 #include <Kokkos_Macros.hpp> 
   20               "Including non-public Kokkos header files is not allowed.");
 
   22 #ifndef KOKKOS_WORKGRAPHPOLICY_HPP 
   23 #define KOKKOS_WORKGRAPHPOLICY_HPP 
   25 #include <impl/Kokkos_AnalyzePolicy.hpp> 
   26 #include <Kokkos_Crs.hpp> 
   31 template <
class functor_type, 
class execution_space, 
class... policy_args>
 
   39 template <
class... Properties>
 
   40 class WorkGraphPolicy : 
public Kokkos::Impl::PolicyTraits<Properties...> {
 
   42   using execution_policy = WorkGraphPolicy<Properties...>;
 
   43   using self_type        = WorkGraphPolicy<Properties...>;
 
   44   using traits           = Kokkos::Impl::PolicyTraits<Properties...>;
 
   45   using index_type       = 
typename traits::index_type;
 
   46   using member_type      = index_type;
 
   47   using execution_space  = 
typename traits::execution_space;
 
   48   using memory_space     = 
typename execution_space::memory_space;
 
   58   using ints_type = Kokkos::View<std::int32_t*, memory_space>;
 
   65   graph_type 
const m_graph;
 
   68   KOKKOS_INLINE_FUNCTION
 
   69   void push_work(
const std::int32_t w) 
const noexcept {
 
   70     const std::int32_t N = m_graph.numRows();
 
   72     std::int32_t* 
const ready_queue = &m_queue[0];
 
   73     std::int32_t* 
const end_hint    = &m_queue[2 * N + 1];
 
   76     const std::int32_t j = atomic_fetch_add(end_hint, 1);
 
   78     if ((N <= j) || (END_TOKEN != atomic_exchange(ready_queue + j, w))) {
 
   80       Kokkos::abort(
"WorkGraphPolicy push_work error");
 
  101   KOKKOS_INLINE_FUNCTION
 
  102   std::int32_t pop_work() const noexcept {
 
  103     const std::int32_t N = m_graph.numRows();
 
  105     std::int32_t* 
const ready_queue = &m_queue[0];
 
  106     std::int32_t* 
const begin_hint  = &m_queue[2 * N];
 
  111     for (std::int32_t i = Kokkos::atomic_load(begin_hint); i < N; ++i) {
 
  112       const std::int32_t w = Kokkos::atomic_load(&ready_queue[i]);
 
  114       if (w == END_TOKEN) {
 
  118       if ((w != BEGIN_TOKEN) &&
 
  119           (w == atomic_compare_exchange(ready_queue + i, w,
 
  120                                         (std::int32_t)BEGIN_TOKEN))) {
 
  123         atomic_inc(begin_hint);
 
  129     return COMPLETED_TOKEN;
 
  132   KOKKOS_INLINE_FUNCTION
 
  133   void completed_work(std::int32_t w) 
const noexcept {
 
  134     Kokkos::memory_fence();
 
  138     const std::int32_t N = m_graph.numRows();
 
  140     std::int32_t* 
const count_queue = &m_queue[N];
 
  142     const std::int32_t B = m_graph.row_map(w);
 
  143     const std::int32_t E = m_graph.row_map(w + 1);
 
  145     for (std::int32_t i = B; i < E; ++i) {
 
  146       const std::int32_t j = m_graph.entries(i);
 
  147       if (1 == atomic_fetch_add(count_queue + j, -1)) {
 
  163   KOKKOS_INLINE_FUNCTION
 
  164   void operator()(
const TagInit, 
int i) 
const noexcept {
 
  165     m_queue[i] = i < m_graph.numRows() ? END_TOKEN : 0;
 
  168   KOKKOS_INLINE_FUNCTION
 
  169   void operator()(
const TagCount, 
int i) 
const noexcept {
 
  170     std::int32_t* 
const count_queue = &m_queue[m_graph.numRows()];
 
  172     atomic_inc(count_queue + m_graph.entries[i]);
 
  175   KOKKOS_INLINE_FUNCTION
 
  176   void operator()(
const TagReady, 
int w) 
const noexcept {
 
  177     std::int32_t 
const* 
const count_queue = &m_queue[m_graph.numRows()];
 
  179     if (0 == count_queue[w]) push_work(w);
 
  182   execution_space space()
 const { 
return execution_space(); }
 
  184   WorkGraphPolicy(
const graph_type& arg_graph)
 
  185       : m_graph(arg_graph),
 
  186         m_queue(view_alloc(
"queue", WithoutInitializing),
 
  187                 arg_graph.numRows() * 2 + 2) {
 
  189       using policy_type  = RangePolicy<std::int32_t, execution_space, TagInit>;
 
  191       const closure_type closure(*
this, policy_type(0, m_queue.size()));
 
  193       execution_space().fence(
 
  194           "Kokkos::WorkGraphPolicy::WorkGraphPolicy: fence after executing " 
  199       using policy_type  = RangePolicy<std::int32_t, execution_space, TagCount>;
 
  201       const closure_type closure(*
this, policy_type(0, m_graph.entries.size()));
 
  203       execution_space().fence(
 
  204           "Kokkos::WorkGraphPolicy::WorkGraphPolicy: fence after executing " 
  209       using policy_type  = RangePolicy<std::int32_t, execution_space, TagReady>;
 
  211       const closure_type closure(*
this, policy_type(0, m_graph.numRows()));
 
  213       execution_space().fence(
 
  214           "Kokkos::WorkGraphPolicy::WorkGraphPolicy: fence after executing " 
  222 #ifdef KOKKOS_ENABLE_SERIAL 
  223 #include "Serial/Kokkos_Serial_WorkGraphPolicy.hpp" 
  226 #ifdef KOKKOS_ENABLE_OPENMP 
  227 #include "OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp" 
  230 #ifdef KOKKOS_ENABLE_CUDA 
  231 #include "Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp" 
  234 #ifdef KOKKOS_ENABLE_HIP 
  235 #include "HIP/Kokkos_HIP_WorkGraphPolicy.hpp" 
  238 #ifdef KOKKOS_ENABLE_THREADS 
  239 #include "Threads/Kokkos_Threads_WorkGraphPolicy.hpp" 
  242 #ifdef KOKKOS_ENABLE_HPX 
  243 #include "HPX/Kokkos_HPX_WorkGraphPolicy.hpp" 
Compressed row storage array. 
 
Implementation of the ParallelFor operator that has a partial specialization for the device...