17 #ifndef KOKKOS_BITSET_HPP 
   18 #define KOKKOS_BITSET_HPP 
   19 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 
   20 #define KOKKOS_IMPL_PUBLIC_INCLUDE 
   21 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET 
   24 #include <Kokkos_Core.hpp> 
   25 #include <Kokkos_BitManipulation.hpp> 
   26 #include <Kokkos_Functional.hpp> 
   28 #include <impl/Kokkos_Bitset_impl.hpp> 
   32 template <
typename Device = Kokkos::DefaultExecutionSpace>
 
   35 template <
typename Device = Kokkos::DefaultExecutionSpace>
 
   38 template <
typename DstDevice, 
typename SrcDevice>
 
   41 template <
typename DstDevice, 
typename SrcDevice>
 
   44 template <
typename DstDevice, 
typename SrcDevice>
 
   48 template <
typename Device>
 
   51   using execution_space = 
typename Device::execution_space;
 
   52   using size_type       = 
unsigned int;
 
   54   static constexpr 
unsigned BIT_SCAN_REVERSE   = 1u;
 
   55   static constexpr 
unsigned MOVE_HINT_BACKWARD = 2u;
 
   57   static constexpr 
unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u;
 
   58   static constexpr 
unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD =
 
   60   static constexpr 
unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD =
 
   62   static constexpr 
unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD =
 
   63       BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
 
   66   static constexpr 
unsigned block_size = 
sizeof(unsigned) * CHAR_BIT;
 
   67   static constexpr 
unsigned block_mask = block_size - 1u;
 
   68   static constexpr 
unsigned block_shift =
 
   69       Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
 
   73   using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
 
   79   Bitset(
unsigned arg_size) : 
Bitset(Kokkos::view_alloc(), arg_size) {}
 
   82   Bitset(
const Impl::ViewCtorProp<P...>& arg_prop, 
unsigned arg_size)
 
   83       : m_size(arg_size), m_last_block_mask(0u) {
 
   85     using alloc_prop_t = std::decay_t<decltype(arg_prop)>;
 
   86     static_assert(alloc_prop_t::initialize,
 
   87                   "Allocation property 'initialize' should be true.");
 
   89         !alloc_prop_t::has_pointer,
 
   90         "Allocation properties should not contain the 'pointer' property.");
 
   93     const auto prop_copy =
 
   94         Impl::with_properties_if_unset(arg_prop, std::string(
"Bitset"));
 
   96         block_view_type(prop_copy, ((m_size + block_mask) >> block_shift));
 
   98     for (
int i = 0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
 
   99       m_last_block_mask |= 1u << i;
 
  103   KOKKOS_DEFAULTED_FUNCTION
 
  106   KOKKOS_DEFAULTED_FUNCTION
 
  109   KOKKOS_DEFAULTED_FUNCTION
 
  112   KOKKOS_DEFAULTED_FUNCTION
 
  115   KOKKOS_DEFAULTED_FUNCTION
 
  120   KOKKOS_FORCEINLINE_FUNCTION
 
  121   unsigned size()
 const { 
return m_size; }
 
  126     Impl::BitsetCount<Bitset<Device>> f(*
this);
 
  133     Kokkos::deep_copy(m_blocks, ~0u);
 
  135     if (m_last_block_mask) {
 
  137       auto last_block = Kokkos::subview(m_blocks, m_blocks.extent(0) - 1u);
 
  138       Kokkos::deep_copy(
typename Device::execution_space{}, last_block,
 
  141           "Bitset::set: fence after clearing unused bits copying from " 
  148   void reset() { Kokkos::deep_copy(m_blocks, 0u); }
 
  152   void clear() { Kokkos::deep_copy(m_blocks, 0u); }
 
  156   KOKKOS_FORCEINLINE_FUNCTION
 
  157   bool set(
unsigned i)
 const {
 
  159       unsigned* block_ptr = &m_blocks[i >> block_shift];
 
  160       const unsigned mask = 1u << static_cast<int>(i & block_mask);
 
  162       return !(atomic_fetch_or(block_ptr, mask) & mask);
 
  169   KOKKOS_FORCEINLINE_FUNCTION
 
  172       unsigned* block_ptr = &m_blocks[i >> block_shift];
 
  173       const unsigned mask = 1u << static_cast<int>(i & block_mask);
 
  175       return atomic_fetch_and(block_ptr, ~mask) & mask;
 
  182   KOKKOS_FORCEINLINE_FUNCTION
 
  185 #ifdef KOKKOS_ENABLE_SYCL 
  186       const unsigned block = Kokkos::atomic_load(&m_blocks[i >> block_shift]);
 
  188       const unsigned block = volatile_load(&m_blocks[i >> block_shift]);
 
  190       const unsigned mask = 1u << static_cast<int>(i & block_mask);
 
  199   KOKKOS_FORCEINLINE_FUNCTION
 
  200   unsigned max_hint()
 const { 
return m_blocks.extent(0); }
 
  206   KOKKOS_INLINE_FUNCTION
 
  209       unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
 const {
 
  210     const unsigned block_idx =
 
  211         (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0;
 
  212     const unsigned offset = hint & block_mask;
 
  213 #ifdef KOKKOS_ENABLE_SYCL 
  214     unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
 
  216     unsigned block = volatile_load(&m_blocks[block_idx]);
 
  218     block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
 
  220                 : block & m_last_block_mask;
 
  222     return find_any_helper(block_idx, offset, block, scan_direction);
 
  229   KOKKOS_INLINE_FUNCTION
 
  232       unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
 const {
 
  233     const unsigned block_idx = hint >> block_shift;
 
  234     const unsigned offset    = hint & block_mask;
 
  235 #ifdef KOKKOS_ENABLE_SYCL 
  236     unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
 
  238     unsigned block = volatile_load(&m_blocks[block_idx]);
 
  240     block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
 
  242                 : ~block & m_last_block_mask;
 
  244     return find_any_helper(block_idx, offset, block, scan_direction);
 
  247   KOKKOS_INLINE_FUNCTION constexpr 
bool is_allocated()
 const {
 
  248     return m_blocks.is_allocated();
 
  252   KOKKOS_FORCEINLINE_FUNCTION
 
  254                                                unsigned offset, 
unsigned block,
 
  255                                                unsigned scan_direction)
 const {
 
  259       result.second = update_hint(block_idx, offset, scan_direction);
 
  262           scan_block((block_idx << block_shift), offset, block, scan_direction);
 
  267   KOKKOS_FORCEINLINE_FUNCTION
 
  268   unsigned scan_block(
unsigned block_start, 
int offset, 
unsigned block,
 
  269                       unsigned scan_direction)
 const {
 
  270     offset = !(scan_direction & BIT_SCAN_REVERSE)
 
  272                  : (offset + block_mask) & block_mask;
 
  273     block  = Experimental::rotr_builtin(block, offset);
 
  274     return (((!(scan_direction & BIT_SCAN_REVERSE)
 
  275                   ? Experimental::countr_zero_builtin(block)
 
  276                   : Experimental::bit_width_builtin(block) - 1) +
 
  282   KOKKOS_FORCEINLINE_FUNCTION
 
  283   unsigned update_hint(
long long block_idx, 
unsigned offset,
 
  284                        unsigned scan_direction)
 const {
 
  285     block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
 
  286     block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1;
 
  288         block_idx < static_cast<long long>(m_blocks.extent(0)) ? block_idx : 0;
 
  290     return static_cast<unsigned>(block_idx) * block_size + offset;
 
  295   unsigned m_last_block_mask = 0;
 
  296   block_view_type m_blocks;
 
  299   template <
typename DDevice>
 
  302   template <
typename DDevice>
 
  303   friend class ConstBitset;
 
  305   template <
typename Bitset>
 
  306   friend struct Impl::BitsetCount;
 
  308   template <
typename DstDevice, 
typename SrcDevice>
 
  309   friend void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> 
const& src);
 
  311   template <
typename DstDevice, 
typename SrcDevice>
 
  312   friend void deep_copy(Bitset<DstDevice>& dst,
 
  313                         ConstBitset<SrcDevice> 
const& src);
 
  318 template <
typename Device>
 
  321   using execution_space = 
typename Device::execution_space;
 
  322   using size_type       = 
unsigned int;
 
  323   using block_view_type = 
typename Bitset<Device>::block_view_type::const_type;
 
  326   static constexpr 
unsigned block_size = 
sizeof(unsigned) * CHAR_BIT;
 
  327   static constexpr 
unsigned block_mask = block_size - 1u;
 
  328   static constexpr 
unsigned block_shift =
 
  329       Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
 
  334   ConstBitset() : m_size(0) {}
 
  337   ConstBitset(Bitset<Device> 
const& rhs)
 
  338       : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
 
  341   ConstBitset(ConstBitset<Device> 
const& rhs)
 
  342       : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
 
  345   ConstBitset<Device>& operator=(Bitset<Device> 
const& rhs) {
 
  346     this->m_size   = rhs.m_size;
 
  347     this->m_blocks = rhs.m_blocks;
 
  353   ConstBitset<Device>& operator=(ConstBitset<Device> 
const& rhs) {
 
  354     this->m_size   = rhs.m_size;
 
  355     this->m_blocks = rhs.m_blocks;
 
  360   KOKKOS_FORCEINLINE_FUNCTION
 
  361   unsigned size()
 const { 
return m_size; }
 
  363   unsigned count()
 const {
 
  364     Impl::BitsetCount<ConstBitset<Device>> f(*
this);
 
  368   KOKKOS_FORCEINLINE_FUNCTION
 
  369   bool test(
unsigned i)
 const {
 
  371       const unsigned block = m_blocks[i >> block_shift];
 
  372       const unsigned mask  = 1u << static_cast<int>(i & block_mask);
 
  380   block_view_type m_blocks;
 
  383   template <
typename DDevice>
 
  384   friend class ConstBitset;
 
  386   template <
typename Bitset>
 
  387   friend struct Impl::BitsetCount;
 
  389   template <
typename DstDevice, 
typename SrcDevice>
 
  390   friend void deep_copy(Bitset<DstDevice>& dst,
 
  391                         ConstBitset<SrcDevice> 
const& src);
 
  393   template <
typename DstDevice, 
typename SrcDevice>
 
  394   friend void deep_copy(ConstBitset<DstDevice>& dst,
 
  395                         ConstBitset<SrcDevice> 
const& src);
 
  398 template <
typename DstDevice, 
typename SrcDevice>
 
  399 void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> 
const& src) {
 
  400   if (dst.size() != src.size()) {
 
  401     Kokkos::Impl::throw_runtime_exception(
 
  402         "Error: Cannot deep_copy bitsets of different sizes!");
 
  404   Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
 
  407 template <
typename DstDevice, 
typename SrcDevice>
 
  408 void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> 
const& src) {
 
  409   if (dst.size() != src.size()) {
 
  410     Kokkos::Impl::throw_runtime_exception(
 
  411         "Error: Cannot deep_copy bitsets of different sizes!");
 
  413   Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
 
  416 template <
typename DstDevice, 
typename SrcDevice>
 
  417 void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> 
const& src) {
 
  418   if (dst.size() != src.size()) {
 
  419     Kokkos::Impl::throw_runtime_exception(
 
  420         "Error: Cannot deep_copy bitsets of different sizes!");
 
  422   Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
 
  427 #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET 
  428 #undef KOKKOS_IMPL_PUBLIC_INCLUDE 
  429 #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET 
  431 #endif  // KOKKOS_BITSET_HPP 
A thread safe view to a bitset. 
 
KOKKOS_FORCEINLINE_FUNCTION unsigned max_hint() const 
 
Replacement for std::pair that works on CUDA devices. 
 
Bitset(const Impl::ViewCtorProp< P...> &arg_prop, unsigned arg_size)
 
KOKKOS_FORCEINLINE_FUNCTION bool reset(unsigned i) const 
 
KOKKOS_FORCEINLINE_FUNCTION bool test(unsigned i) const 
 
KOKKOS_FORCEINLINE_FUNCTION unsigned size() const 
 
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_set_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const 
 
Bitset(unsigned arg_size)
arg_size := number of bit in set 
 
KOKKOS_FORCEINLINE_FUNCTION bool set(unsigned i) const 
 
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_unset_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const