17 #ifndef KOKKOS_BIT_MANIPULATION_HPP 
   18 #define KOKKOS_BIT_MANIPULATION_HPP 
   20 #include <Kokkos_Macros.hpp> 
   21 #include <Kokkos_NumericTraits.hpp> 
   24 #include <type_traits> 
   26 namespace Kokkos::Impl {
 
   29 KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
 
   30   if constexpr (
sizeof(T) > 1) {
 
   31     using U = std::make_unsigned_t<T>;
 
   33     size_t shift = CHAR_BIT * (
sizeof(T) - 1);
 
   35     U lo_mask = 
static_cast<unsigned char>(~0);
 
   36     U hi_mask = lo_mask << shift;
 
   40     for (
size_t i = 0; i < 
sizeof(T) / 2; ++i) {
 
   41       U lo_val = val & lo_mask;
 
   42       U hi_val = val & hi_mask;
 
   44       val = (val & ~lo_mask) | (hi_val >> shift);
 
   45       val = (val & ~hi_mask) | (lo_val << shift);
 
   50       shift -= 
static_cast<size_t>(2) * CHAR_BIT;
 
   59 KOKKOS_FUNCTION constexpr 
int countl_zero_fallback(T x) {
 
   62   using ::Kokkos::Experimental::digits_v;
 
   64   int c = digits_v<T> / 2;
 
   73   return n - 
static_cast<int>(x);
 
   77 KOKKOS_FUNCTION constexpr 
int countr_zero_fallback(T x) {
 
   78   using ::Kokkos::Experimental::digits_v;
 
   79   return digits_v<T> - countl_zero_fallback(static_cast<T>(
 
   80                            static_cast<T>(~x) & static_cast<T>(x - 1)));
 
   84 KOKKOS_FUNCTION constexpr 
int popcount_fallback(T x) {
 
   86   for (; x != 0; x &= x - 1) {
 
   93 inline constexpr 
bool is_standard_unsigned_integer_type_v =
 
   94     std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
 
   95     std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
 
   96     std::is_same_v<T, unsigned long long>;
 
  103 template <
class To, 
class From>
 
  104 KOKKOS_FUNCTION std::enable_if_t<
sizeof(To) == 
sizeof(From) &&
 
  105                                      std::is_trivially_copyable_v<To> &&
 
  106                                      std::is_trivially_copyable_v<From>,
 
  108 bit_cast(From 
const& from) noexcept {
 
  109 #if defined(KOKKOS_ENABLE_SYCL) 
  110   return sycl::bit_cast<To>(from);
 
  113   memcpy(static_cast<void*>(&to), static_cast<const void*>(&from), 
sizeof(To));
 
  121 KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
 
  123   return Impl::byteswap_fallback(value);
 
  129 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  130     Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  131 countl_zero(T x) noexcept {
 
  132   using ::Kokkos::Experimental::digits_v;
 
  133   if (x == 0) 
return digits_v<T>;
 
  135   return Impl::countl_zero_fallback(x);
 
  139 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  140     Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  141 countl_one(T x) noexcept {
 
  142   using ::Kokkos::Experimental::digits_v;
 
  143   using ::Kokkos::Experimental::finite_max_v;
 
  144   if (x == finite_max_v<T>) 
return digits_v<T>;
 
  145   return countl_zero(static_cast<T>(~x));
 
  149 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  150     Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  151 countr_zero(T x) noexcept {
 
  152   using ::Kokkos::Experimental::digits_v;
 
  153   if (x == 0) 
return digits_v<T>;
 
  155   return Impl::countr_zero_fallback(x);
 
  159 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  160     Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  161 countr_one(T x) noexcept {
 
  162   using ::Kokkos::Experimental::digits_v;
 
  163   using ::Kokkos::Experimental::finite_max_v;
 
  164   if (x == finite_max_v<T>) 
return digits_v<T>;
 
  165   return countr_zero(static_cast<T>(~x));
 
  169 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  170     Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  171 popcount(T x) noexcept {
 
  172   if (x == 0) 
return 0;
 
  174   return Impl::popcount_fallback(x);
 
  180 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  181     Impl::is_standard_unsigned_integer_type_v<T>, 
bool>
 
  182 has_single_bit(T x) noexcept {
 
  183   return x != 0 && (((x & (x - 1)) == 0));
 
  187 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  188     Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  189 bit_ceil(T x) noexcept {
 
  190   if (x <= 1) 
return 1;
 
  191   using ::Kokkos::Experimental::digits_v;
 
  192   return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
 
  196 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  197     Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  198 bit_floor(T x) noexcept {
 
  199   if (x == 0) 
return 0;
 
  200   using ::Kokkos::Experimental::digits_v;
 
  201   return T{1} << (digits_v<T> - 1 - countl_zero(x));
 
  205 KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  206     Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  207 bit_width(T x) noexcept {
 
  208   if (x == 0) 
return 0;
 
  209   using ::Kokkos::Experimental::digits_v;
 
  210   return digits_v<T> - countl_zero(x);
 
  216 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  217     Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  218 rotl(T x, 
int s) noexcept {
 
  219   using Experimental::digits_v;
 
  220   constexpr 
auto dig = digits_v<T>;
 
  221   int const rem      = s % dig;
 
  222   if (rem == 0) 
return x;
 
  223   if (rem > 0) 
return (x << rem) | (x >> ((dig - rem) % dig));
 
  224   return (x >> -rem) | (x << ((dig + rem) % dig));  
 
  228 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
 
  229     Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  230 rotr(T x, 
int s) noexcept {
 
  231   using Experimental::digits_v;
 
  232   constexpr 
auto dig = digits_v<T>;
 
  233   int const rem      = s % dig;
 
  234   if (rem == 0) 
return x;
 
  235   if (rem > 0) 
return (x >> rem) | (x << ((dig - rem) % dig));
 
  236   return (x << -rem) | (x >> ((dig + rem) % dig));  
 
  242 namespace Kokkos::Impl {
 
  244 #if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \ 
  245     defined(KOKKOS_COMPILER_GNU) 
  246 #define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS 
  250 KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
 
  251   return byteswap_fallback(x);
 
  255 KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
 
  256 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS 
  257   if constexpr (
sizeof(T) == 1) {
 
  259   } 
else if constexpr (
sizeof(T) == 2) {
 
  260     return __builtin_bswap16(x);
 
  261   } 
else if constexpr (
sizeof(T) == 4) {
 
  262     return __builtin_bswap32(x);
 
  263   } 
else if constexpr (
sizeof(T) == 8) {
 
  264     return __builtin_bswap64(x);
 
  265   } 
else if constexpr (
sizeof(T) == 16) {
 
  266 #if defined(__has_builtin) 
  267 #if __has_builtin(__builtin_bswap128) 
  268     return __builtin_bswap128(x);
 
  271     return (__builtin_bswap64(x >> 64) |
 
  272             (static_cast<T>(__builtin_bswap64(x)) << 64));
 
  276   return byteswap_fallback(x);
 
  280 KOKKOS_IMPL_DEVICE_FUNCTION
 
  281     std::enable_if_t<is_standard_unsigned_integer_type_v<T>, 
int>
 
  282     countl_zero_builtin_device(T x) noexcept {
 
  283 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) 
  284   if constexpr (
sizeof(T) == 
sizeof(
long long int))
 
  285     return __clzll(reinterpret_cast<long long int&>(x));
 
  286   if constexpr (
sizeof(T) == 
sizeof(
int))
 
  287     return __clz(reinterpret_cast<
int&>(x));
 
  288   using ::Kokkos::Experimental::digits_v;
 
  289   constexpr 
int shift = digits_v<
unsigned int> - digits_v<T>;
 
  290   return __clz(x) - shift;
 
  291 #elif defined(KOKKOS_ENABLE_SYCL) 
  294   return countl_zero_fallback(x);
 
  299 KOKKOS_IMPL_HOST_FUNCTION
 
  300     std::enable_if_t<is_standard_unsigned_integer_type_v<T>, 
int>
 
  301     countl_zero_builtin_host(T x) noexcept {
 
  302   using ::Kokkos::Experimental::digits_v;
 
  303   if (x == 0) 
return digits_v<T>;
 
  304 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS 
  305   if constexpr (std::is_same_v<T, unsigned long long>) {
 
  306     return __builtin_clzll(x);
 
  307   } 
else if constexpr (std::is_same_v<T, unsigned long>) {
 
  308     return __builtin_clzl(x);
 
  309   } 
else if constexpr (std::is_same_v<T, unsigned int>) {
 
  310     return __builtin_clz(x);
 
  312     constexpr 
int shift = digits_v<unsigned int> - digits_v<T>;
 
  313     return __builtin_clz(x) - shift;
 
  316   return countl_zero_fallback(x);
 
  321 KOKKOS_IMPL_DEVICE_FUNCTION
 
  322     std::enable_if_t<is_standard_unsigned_integer_type_v<T>, 
int>
 
  323     countr_zero_builtin_device(T x) noexcept {
 
  324   using ::Kokkos::Experimental::digits_v;
 
  325   if (x == 0) 
return digits_v<T>;
 
  326 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) 
  327   if constexpr (
sizeof(T) == 
sizeof(
long long int))
 
  328     return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
 
  329   return __ffs(reinterpret_cast<int&>(x)) - 1;
 
  330 #elif defined(KOKKOS_ENABLE_SYCL) 
  333   return countr_zero_fallback(x);
 
  338 KOKKOS_IMPL_HOST_FUNCTION
 
  339     std::enable_if_t<is_standard_unsigned_integer_type_v<T>, 
int>
 
  340     countr_zero_builtin_host(T x) noexcept {
 
  341   using ::Kokkos::Experimental::digits_v;
 
  342   if (x == 0) 
return digits_v<T>;
 
  343 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS 
  344   if constexpr (std::is_same_v<T, unsigned long long>) {
 
  345     return __builtin_ctzll(x);
 
  346   } 
else if constexpr (std::is_same_v<T, unsigned long>) {
 
  347     return __builtin_ctzl(x);
 
  349     return __builtin_ctz(x);
 
  352   return countr_zero_fallback(x);
 
  357 KOKKOS_IMPL_DEVICE_FUNCTION
 
  358     std::enable_if_t<is_standard_unsigned_integer_type_v<T>, 
int>
 
  359     popcount_builtin_device(T x) noexcept {
 
  360 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) 
  361   if constexpr (
sizeof(T) == 
sizeof(
long long int)) 
return __popcll(x);
 
  363 #elif defined(KOKKOS_ENABLE_SYCL) 
  364   return sycl::popcount(x);
 
  366   return popcount_fallback(x);
 
  371 KOKKOS_IMPL_HOST_FUNCTION
 
  372     std::enable_if_t<is_standard_unsigned_integer_type_v<T>, 
int>
 
  373     popcount_builtin_host(T x) noexcept {
 
  374 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS 
  375   if constexpr (std::is_same_v<T, unsigned long long>) {
 
  376     return __builtin_popcountll(x);
 
  377   } 
else if constexpr (std::is_same_v<T, unsigned long>) {
 
  378     return __builtin_popcountl(x);
 
  380     return __builtin_popcount(x);
 
  383   return popcount_fallback(x);
 
  387 #undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS 
  391 namespace Kokkos::Experimental {
 
  393 template <
class To, 
class From>
 
  394 KOKKOS_FUNCTION std::enable_if_t<
sizeof(To) == 
sizeof(From) &&
 
  395                                      std::is_trivially_copyable_v<To> &&
 
  396                                      std::is_trivially_copyable_v<From>,
 
  398 bit_cast_builtin(From 
const& from) noexcept {
 
  400   return Kokkos::bit_cast<To>(from);  
 
  404 KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
 
  406   KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
 
  407   KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
 
  409 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC) 
  415 KOKKOS_FUNCTION std::enable_if_t<
 
  416     ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  417 countl_zero_builtin(T x) noexcept {
 
  418   KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
 
  419   KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
 
  421 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC) 
  427 KOKKOS_FUNCTION std::enable_if_t<
 
  428     ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  429 countl_one_builtin(T x) noexcept {
 
  430   if (x == finite_max_v<T>) 
return digits_v<T>;
 
  431   return countl_zero_builtin(static_cast<T>(~x));
 
  435 KOKKOS_FUNCTION std::enable_if_t<
 
  436     ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  437 countr_zero_builtin(T x) noexcept {
 
  438   KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
 
  439   KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
 
  441 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC) 
  447 KOKKOS_FUNCTION std::enable_if_t<
 
  448     ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  449 countr_one_builtin(T x) noexcept {
 
  450   if (x == finite_max_v<T>) 
return digits_v<T>;
 
  451   return countr_zero_builtin(static_cast<T>(~x));
 
  455 KOKKOS_FUNCTION std::enable_if_t<
 
  456     ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, 
int>
 
  457 popcount_builtin(T x) noexcept {
 
  458   KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
 
  459   KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
 
  461 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC) 
  467 KOKKOS_FUNCTION std::enable_if_t<
 
  468     ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, 
bool>
 
  469 has_single_bit_builtin(T x) noexcept {
 
  470   return has_single_bit(x);  
 
  475     std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  476     bit_ceil_builtin(T x) noexcept {
 
  477   if (x <= 1) 
return 1;
 
  478   return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
 
  483     std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  484     bit_floor_builtin(T x) noexcept {
 
  485   if (x == 0) 
return 0;
 
  486   return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
 
  491     std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  492     bit_width_builtin(T x) noexcept {
 
  493   if (x == 0) 
return 0;
 
  494   return digits_v<T> - countl_zero_builtin(x);
 
  498 [[nodiscard]] KOKKOS_FUNCTION
 
  499     std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  500     rotl_builtin(T x, 
int s) noexcept {
 
  505 [[nodiscard]] KOKKOS_FUNCTION
 
  506     std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
 
  507     rotr_builtin(T x, 
int s) noexcept {