16 #ifndef Intrepid2_DataCombiners_hpp
17 #define Intrepid2_DataCombiners_hpp
28 #include "Intrepid2_ScalarView.hpp"
31 template<
class DataScalar,
typename DeviceType>
34 template<
class BinaryOperator,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType,
35 class ArgExtractorThis,
class ArgExtractorA,
class ArgExtractorB,
bool includeInnerLoop=
false>
39 ThisUnderlyingViewType this_underlying_;
40 AUnderlyingViewType A_underlying_;
41 BUnderlyingViewType B_underlying_;
42 BinaryOperator binaryOperator_;
45 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
46 BinaryOperator binaryOperator)
48 this_underlying_(this_underlying),
49 A_underlying_(A_underlying),
50 B_underlying_(B_underlying),
51 binaryOperator_(binaryOperator),
54 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,
"If includeInnerLoop is true, must specify the size of the inner loop");
57 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
58 BinaryOperator binaryOperator,
int innerLoopSize)
60 this_underlying_(this_underlying),
61 A_underlying_(A_underlying),
62 B_underlying_(B_underlying),
63 binaryOperator_(binaryOperator),
64 innerLoopSize_(innerLoopSize)
66 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,
"If includeInnerLoop is true, must specify the size of the inner loop");
69 template<
class ...IntArgs,
bool M=includeInnerLoop>
70 KOKKOS_INLINE_FUNCTION
72 operator()(
const IntArgs&... args)
const
74 auto & result = ArgExtractorThis::get( this_underlying_, args... );
75 const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
76 const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
78 result = binaryOperator_(A_val,B_val);
81 template<
class ...IntArgs,
bool M=includeInnerLoop>
82 KOKKOS_INLINE_FUNCTION
84 operator()(
const IntArgs&... args)
const
86 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
87 for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
89 auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
90 const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
91 const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
93 result = binaryOperator_(A_val,B_val);
99 template<
class BinaryOperator,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType>
103 ThisUnderlyingViewType this_underlying_;
104 AUnderlyingViewType A_underlying_;
105 BUnderlyingViewType B_underlying_;
106 BinaryOperator binaryOperator_;
109 AUnderlyingViewType A_underlying,
110 BUnderlyingViewType B_underlying,
111 BinaryOperator binaryOperator)
113 this_underlying_(this_underlying),
114 A_underlying_(A_underlying),
115 B_underlying_(B_underlying),
116 binaryOperator_(binaryOperator)
118 INTREPID2_TEST_FOR_EXCEPTION(this_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
119 INTREPID2_TEST_FOR_EXCEPTION(A_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
120 INTREPID2_TEST_FOR_EXCEPTION(B_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
123 KOKKOS_INLINE_FUNCTION
124 void operator()(
const int arg0)
const
126 auto & result = this_underlying_(0);
127 const auto & A_val = A_underlying_(0);
128 const auto & B_val = B_underlying_(0);
130 result = binaryOperator_(A_val,B_val);
135 template<
bool passThroughBlockDiagonalArgs>
138 template<
class ViewType,
class ...IntArgs>
139 static KOKKOS_INLINE_FUNCTION
typename ViewType::reference_type
get(
const ViewType &view,
const IntArgs&... intArgs)
141 return view.getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
146 template<
bool passThroughBlockDiagonalArgs>
149 template<
class ViewType,
class ...IntArgs>
150 static KOKKOS_INLINE_FUNCTION
typename ViewType::const_reference_type
get(
const ViewType &view,
const IntArgs&... intArgs)
152 return view.getEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
157 template <
class DataScalar,
typename DeviceType,
class BinaryOperator>
160 using reference_type =
typename ScalarView<DataScalar,DeviceType>::reference_type;
161 using const_reference_type =
typename ScalarView<const DataScalar,DeviceType>::reference_type;
164 template<
class PolicyType,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType,
165 class ArgExtractorThis,
class ArgExtractorA,
class ArgExtractorB>
167 AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
168 BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
171 Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
172 Kokkos::parallel_for(
"compute in-place", policy, functor);
178 enable_if_t<rank != 7, void>
181 auto policy = thisData.template dataExtentRangePolicy<rank>();
210 const auto & variationTypes = data.getVariationTypes();
211 for (
int d=0; d<rank; d++)
213 if (variationTypes[d] == GENERAL)
223 auto thisAE = constArg;
226 auto & this_underlying = thisData.template getUnderlyingView<1>();
227 auto & A_underlying = A.template getUnderlyingView<1>();
228 auto & B_underlying = B.template getUnderlyingView<1>();
229 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
231 else if (this_full && A_full && B_full)
233 auto thisAE = fullArgs;
237 auto & this_underlying = thisData.template getUnderlyingView<rank>();
238 auto & A_underlying = A.template getUnderlyingView<rank>();
239 auto & B_underlying = B.template getUnderlyingView<rank>();
241 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
246 auto & A_underlying = A.template getUnderlyingView<1>();
249 auto thisAE = fullArgs;
250 auto & this_underlying = thisData.template getUnderlyingView<rank>();
255 auto & B_underlying = B.template getUnderlyingView<rank>();
256 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
260 auto BAE = fullArgsData;
267 if (B_1D && (get1DArgIndex(B) != -1) )
270 const int argIndex = get1DArgIndex(B);
271 auto & B_underlying = B.template getUnderlyingView<1>();
272 auto & this_underlying = thisData.template getUnderlyingView<1>();
275 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0);
break;
276 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1);
break;
277 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2);
break;
278 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3);
break;
279 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4);
break;
280 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5);
break;
281 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
287 auto thisAE = fullArgsWritable;
288 auto BAE = fullArgsData;
296 auto & B_underlying = B.template getUnderlyingView<1>();
299 auto thisAE = fullArgs;
300 auto & this_underlying = thisData.template getUnderlyingView<rank>();
304 auto & A_underlying = A.template getUnderlyingView<rank>();
306 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
311 auto AAE = fullArgsData;
318 if (A_1D && (get1DArgIndex(A) != -1) )
321 const int argIndex = get1DArgIndex(A);
322 auto & A_underlying = A.template getUnderlyingView<1>();
323 auto & this_underlying = thisData.template getUnderlyingView<1>();
326 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE);
break;
327 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE);
break;
328 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE);
break;
329 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE);
break;
330 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE);
break;
331 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE);
break;
332 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
338 auto thisAE = fullArgsWritable;
339 auto AAE = fullArgsData;
346 if (this_1D && (get1DArgIndex(thisData) != -1))
353 const int argThis = get1DArgIndex(thisData);
354 const int argA = get1DArgIndex(A);
355 const int argB = get1DArgIndex(B);
357 auto & A_underlying = A.template getUnderlyingView<1>();
358 auto & B_underlying = B.template getUnderlyingView<1>();
359 auto & this_underlying = thisData.template getUnderlyingView<1>();
360 if ((argA != -1) && (argB != -1))
362 #ifdef INTREPID2_HAVE_DEBUG
363 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error,
"Unexpected 1D arg combination.");
364 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error,
"Unexpected 1D arg combination.");
368 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0);
break;
369 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1);
break;
370 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2);
break;
371 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3);
break;
372 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4);
break;
373 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5);
break;
374 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
382 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsData);
break;
383 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsData);
break;
384 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsData);
break;
385 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsData);
break;
386 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsData);
break;
387 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsData);
break;
388 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
396 case 0:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsData, arg0);
break;
397 case 1:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsData, arg1);
break;
398 case 2:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsData, arg2);
break;
399 case 3:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsData, arg3);
break;
400 case 4:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsData, arg4);
break;
401 case 5:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsData, arg5);
break;
402 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
409 auto & this_underlying = thisData.template getUnderlyingView<rank>();
410 auto thisAE = fullArgs;
414 auto & A_underlying = A.template getUnderlyingView<rank>();
417 if (B_1D && (get1DArgIndex(B) != -1))
419 const int argIndex = get1DArgIndex(B);
420 auto & B_underlying = B.template getUnderlyingView<1>();
423 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0);
break;
424 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1);
break;
425 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2);
break;
426 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3);
break;
427 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4);
break;
428 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5);
break;
429 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
442 if (A_1D && (get1DArgIndex(A) != -1))
444 const int argIndex = get1DArgIndex(A);
445 auto & A_underlying = A.template getUnderlyingView<1>();
448 auto & B_underlying = B.template getUnderlyingView<rank>();
452 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE);
break;
453 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE);
break;
454 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE);
break;
455 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE);
break;
456 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE);
break;
457 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE);
break;
458 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
463 auto BAE = fullArgsData;
466 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE);
break;
467 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE);
break;
468 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE);
break;
469 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE);
break;
470 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE);
break;
471 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE);
break;
472 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
479 auto AAE = fullArgsData;
480 auto BAE = fullArgsData;
488 auto thisAE = fullArgsWritable;
489 auto AAE = fullArgsData;
490 auto BAE = fullArgsData;
499 enable_if_t<rank == 7, void>
502 auto policy = thisData.template dataExtentRangePolicy<rank>();
510 const bool includeInnerLoop =
true;
512 Functor functor(thisData, A, B, binaryOperator, dim6);
513 Kokkos::parallel_for(
"compute in-place", policy, functor);
518 using ExecutionSpace =
typename DeviceType::execution_space;
520 #ifdef INTREPID2_HAVE_DEBUG
522 for (
int d=0; d<rank_; d++)
524 INTREPID2_TEST_FOR_EXCEPTION(A.
extent_int(d) != thisData.
extent_int(d), std::invalid_argument,
"A, B, and this must agree on all logical extents");
525 INTREPID2_TEST_FOR_EXCEPTION(B.
extent_int(d) != thisData.
extent_int(d), std::invalid_argument,
"A, B, and this must agree on all logical extents");
536 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1);
538 auto this_underlying = thisData.template getUnderlyingView<1>();
539 auto A_underlying = A.template getUnderlyingView<1>();
540 auto B_underlying = B.template getUnderlyingView<1>();
542 using ConstantCaseFunctor = InPlaceCombinationFunctorConstantCase<decltype(binaryOperator), decltype(this_underlying),
543 decltype(A_underlying), decltype(B_underlying)>;
545 ConstantCaseFunctor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
546 Kokkos::parallel_for(
"compute in-place", policy,functor);
550 switch (thisData.
rank())
552 case 1: storeInPlaceCombination<1>(thisData, A, B, binaryOperator);
break;
553 case 2: storeInPlaceCombination<2>(thisData, A, B, binaryOperator);
break;
554 case 3: storeInPlaceCombination<3>(thisData, A, B, binaryOperator);
break;
555 case 4: storeInPlaceCombination<4>(thisData, A, B, binaryOperator);
break;
556 case 5: storeInPlaceCombination<5>(thisData, A, B, binaryOperator);
break;
557 case 6: storeInPlaceCombination<6>(thisData, A, B, binaryOperator);
break;
558 case 7: storeInPlaceCombination<7>(thisData, A, B, binaryOperator);
break;
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
static enable_if_t< rank==7, void > storeInPlaceCombination(Data< DataScalar, DeviceType > &thisData, const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expect...
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
Defines the Data class, a wrapper around a Kokkos::View that allows data that is constant or repeatin...
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
static void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
Defines functors for use with Data objects: so far, we include simple arithmetical functors for sum...
functor definition for the constant-data case.
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
Defines DataVariationType enum that specifies the types of variation possible within a Data object...
static enable_if_t< rank!=7, void > storeInPlaceCombination(Data< DataScalar, DeviceType > &thisData, const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...