10 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Tpetra_MultiVector.hpp"
15 #include "Tpetra_Operator.hpp"
16 #include "Tpetra_Vector.hpp"
17 #include "Tpetra_Export_decl.hpp"
18 #include "Tpetra_Import_decl.hpp"
19 #include "Kokkos_ArithTraits.hpp"
20 #include "Teuchos_Assert.hpp"
21 #include <type_traits>
22 #include "KokkosSparse_spmv_impl.hpp"
32 template <
class WVector,
40 static_assert(static_cast<int>(WVector::rank) == 1,
41 "WVector must be a rank 1 View.");
42 static_assert(static_cast<int>(DVector::rank) == 1,
43 "DVector must be a rank 1 View.");
44 static_assert(static_cast<int>(BVector::rank) == 1,
45 "BVector must be a rank 1 View.");
46 static_assert(static_cast<int>(XVector::rank) == 1,
47 "XVector must be a rank 1 View.");
49 using execution_space =
typename AMatrix::execution_space;
50 using LO =
typename AMatrix::non_const_ordinal_type;
51 using value_type =
typename AMatrix::non_const_value_type;
52 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
53 using team_member =
typename team_policy::member_type;
54 using ATV = Kokkos::ArithTraits<value_type>;
64 const LO rows_per_team;
73 const int rows_per_team_)
81 , rows_per_team(rows_per_team_) {
82 const size_t numRows = m_A.numRows();
83 const size_t numCols = m_A.numCols();
91 KOKKOS_INLINE_FUNCTION
92 void operator()(
const team_member& dev)
const {
93 using residual_value_type =
typename BVector::non_const_value_type;
94 using KAT = Kokkos::ArithTraits<residual_value_type>;
96 Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
99 static_cast<LO
>(dev.league_rank()) * rows_per_team + loop;
100 if (lclRow >= m_A.numRows()) {
103 const auto A_row = m_A.rowConst(lclRow);
104 const LO row_length =
static_cast<LO
>(A_row.length);
105 residual_value_type A_x = KAT::zero();
107 Kokkos::parallel_reduce(
108 Kokkos::ThreadVectorRange(dev, row_length),
109 [&](
const LO iEntry, residual_value_type& lsum) {
110 const auto A_val = A_row.value(iEntry);
111 lsum += A_val * m_x(A_row.colidx(iEntry));
115 Kokkos::single(Kokkos::PerThread(dev),
117 const auto alpha_D_res =
118 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
120 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
122 m_w(lclRow) = alpha_D_res;
130 template <
class WVector,
137 scaled_damped_residual_vector(
const Scalar& alpha,
143 const Scalar& beta) {
144 using execution_space =
typename AMatrix::execution_space;
146 if (A.numRows() == 0) {
151 int vector_length = -1;
152 int64_t rows_per_thread = -1;
154 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
155 int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
157 using Kokkos::Dynamic;
158 using Kokkos::Schedule;
159 using Kokkos::Static;
160 using Kokkos::TeamPolicy;
161 using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
162 using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
163 const char kernel_label[] =
"scaled_damped_residual_vector";
164 policy_type_dynamic policyDynamic(1, 1);
165 policy_type_static policyStatic(1, 1);
167 policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
168 policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
170 policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
171 policyStatic = policy_type_static(worksets, team_size, vector_length);
175 using w_vec_type =
typename WVector::non_const_type;
176 using d_vec_type =
typename DVector::const_type;
177 using b_vec_type =
typename BVector::const_type;
178 using matrix_type = AMatrix;
179 using x_vec_type =
typename XVector::const_type;
180 using scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
182 if (beta == Kokkos::ArithTraits<Scalar>::zero()) {
183 constexpr
bool use_beta =
false;
185 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
186 b_vec_type, matrix_type,
187 x_vec_type, scalar_type,
189 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
190 if (A.nnz() > 10000000)
191 Kokkos::parallel_for(kernel_label, policyDynamic, func);
193 Kokkos::parallel_for(kernel_label, policyStatic, func);
195 constexpr
bool use_beta =
true;
197 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
198 b_vec_type, matrix_type,
199 x_vec_type, scalar_type,
201 functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
202 if (A.nnz() > 10000000)
203 Kokkos::parallel_for(kernel_label, policyDynamic, func);
205 Kokkos::parallel_for(kernel_label, policyStatic, func);
211 template <
class TpetraOperatorType>
212 ScaledDampedResidual<TpetraOperatorType>::
217 template <
class TpetraOperatorType>
218 void ScaledDampedResidual<TpetraOperatorType>::
220 if (A_op_.get() != A.
get()) {
224 X_colMap_ = std::unique_ptr<vector_type>(
nullptr);
225 V1_ = std::unique_ptr<multivector_type>(
nullptr);
227 using Teuchos::rcp_dynamic_cast;
229 rcp_dynamic_cast<
const crs_matrix_type>(A);
231 A_crs_ = Teuchos::null;
232 imp_ = Teuchos::null;
233 exp_ = Teuchos::null;
237 auto G = A_crs->getCrsGraph();
238 imp_ = G->getImporter();
239 exp_ = G->getExporter();
244 template <
class TpetraOperatorType>
245 void ScaledDampedResidual<TpetraOperatorType>::
246 compute(multivector_type& W,
257 W_vec_ = W.getVectorNonConst(0);
258 B_vec_ = B.getVectorNonConst(0);
259 X_vec_ = X.getVectorNonConst(0);
261 fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
264 unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
268 template <
class TpetraOperatorType>
269 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
270 ScaledDampedResidual<TpetraOperatorType>::
271 importVector(vector_type& X_domMap) {
272 if (imp_.is_null()) {
275 if (X_colMap_.get() ==
nullptr) {
276 using V = vector_type;
277 X_colMap_ = std::unique_ptr<V>(
new V(imp_->getTargetMap()));
279 X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
284 template <
class TpetraOperatorType>
285 bool ScaledDampedResidual<TpetraOperatorType>::
286 canFuse(
const multivector_type& B)
const {
287 return B.getNumVectors() == size_t(1) &&
292 template <
class TpetraOperatorType>
293 void ScaledDampedResidual<TpetraOperatorType>::
294 unfusedCase(multivector_type& W,
298 const operator_type& A,
301 if (V1_.get() ==
nullptr) {
302 using MV = multivector_type;
303 const size_t numVecs = B.getNumVectors();
304 V1_ = std::unique_ptr<MV>(
new MV(B.getMap(), numVecs));
309 Tpetra::deep_copy(*V1_, B);
313 W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
316 template <
class TpetraOperatorType>
317 void ScaledDampedResidual<TpetraOperatorType>::
318 fusedCase(vector_type& W,
322 const crs_matrix_type& A,
325 vector_type& X_colMap = importVector(X);
327 using Impl::scaled_damped_residual_vector;
330 auto A_lcl = A.getLocalMatrixDevice();
331 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
332 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
333 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
334 if (beta == STS::zero()) {
335 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
336 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
337 B_lcl, A_lcl, X_lcl, beta);
339 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
340 scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
341 B_lcl, A_lcl, X_lcl, beta);
348 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
349 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
351 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:39
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)