Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_ScaledDampedResidual_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
12 
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Tpetra_MultiVector.hpp"
15 #include "Tpetra_Operator.hpp"
16 #include "Tpetra_Vector.hpp"
17 #include "Tpetra_Export_decl.hpp"
18 #include "Tpetra_Import_decl.hpp"
19 #include "Kokkos_ArithTraits.hpp"
20 #include "Teuchos_Assert.hpp"
21 #include <type_traits>
22 #include "KokkosSparse_spmv_impl.hpp"
23 
24 namespace Ifpack2 {
25 namespace Details {
26 namespace Impl {
27 
32 template <class WVector,
33  class DVector,
34  class BVector,
35  class AMatrix,
36  class XVector,
37  class Scalar,
38  bool use_beta>
40  static_assert(static_cast<int>(WVector::rank) == 1,
41  "WVector must be a rank 1 View.");
42  static_assert(static_cast<int>(DVector::rank) == 1,
43  "DVector must be a rank 1 View.");
44  static_assert(static_cast<int>(BVector::rank) == 1,
45  "BVector must be a rank 1 View.");
46  static_assert(static_cast<int>(XVector::rank) == 1,
47  "XVector must be a rank 1 View.");
48 
49  using execution_space = typename AMatrix::execution_space;
50  using LO = typename AMatrix::non_const_ordinal_type;
51  using value_type = typename AMatrix::non_const_value_type;
52  using team_policy = typename Kokkos::TeamPolicy<execution_space>;
53  using team_member = typename team_policy::member_type;
54  using ATV = Kokkos::ArithTraits<value_type>;
55 
56  const Scalar alpha;
57  WVector m_w;
58  DVector m_d;
59  BVector m_b;
60  AMatrix m_A;
61  XVector m_x;
62  const Scalar beta;
63 
64  const LO rows_per_team;
65 
66  ScaledDampedResidualVectorFunctor(const Scalar& alpha_,
67  const WVector& m_w_,
68  const DVector& m_d_,
69  const BVector& m_b_,
70  const AMatrix& m_A_,
71  const XVector& m_x_,
72  const Scalar& beta_,
73  const int rows_per_team_)
74  : alpha(alpha_)
75  , m_w(m_w_)
76  , m_d(m_d_)
77  , m_b(m_b_)
78  , m_A(m_A_)
79  , m_x(m_x_)
80  , beta(beta_)
81  , rows_per_team(rows_per_team_) {
82  const size_t numRows = m_A.numRows();
83  const size_t numCols = m_A.numCols();
84 
85  TEUCHOS_ASSERT(m_w.extent(0) == m_d.extent(0));
86  TEUCHOS_ASSERT(m_w.extent(0) == m_b.extent(0));
87  TEUCHOS_ASSERT(numRows == size_t(m_w.extent(0)));
88  TEUCHOS_ASSERT(numCols <= size_t(m_x.extent(0)));
89  }
90 
91  KOKKOS_INLINE_FUNCTION
92  void operator()(const team_member& dev) const {
93  using residual_value_type = typename BVector::non_const_value_type;
94  using KAT = Kokkos::ArithTraits<residual_value_type>;
95 
96  Kokkos::parallel_for(Kokkos::TeamThreadRange(dev, 0, rows_per_team),
97  [&](const LO& loop) {
98  const LO lclRow =
99  static_cast<LO>(dev.league_rank()) * rows_per_team + loop;
100  if (lclRow >= m_A.numRows()) {
101  return;
102  }
103  const auto A_row = m_A.rowConst(lclRow);
104  const LO row_length = static_cast<LO>(A_row.length);
105  residual_value_type A_x = KAT::zero();
106 
107  Kokkos::parallel_reduce(
108  Kokkos::ThreadVectorRange(dev, row_length),
109  [&](const LO iEntry, residual_value_type& lsum) {
110  const auto A_val = A_row.value(iEntry);
111  lsum += A_val * m_x(A_row.colidx(iEntry));
112  },
113  A_x);
114 
115  Kokkos::single(Kokkos::PerThread(dev),
116  [&]() {
117  const auto alpha_D_res =
118  alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
119  if (use_beta) {
120  m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
121  } else {
122  m_w(lclRow) = alpha_D_res;
123  }
124  });
125  });
126  }
127 };
128 
129 // W := alpha * D * (B - A*X) + beta * W.
130 template <class WVector,
131  class DVector,
132  class BVector,
133  class AMatrix,
134  class XVector,
135  class Scalar>
136 static void
137 scaled_damped_residual_vector(const Scalar& alpha,
138  const WVector& w,
139  const DVector& d,
140  const BVector& b,
141  const AMatrix& A,
142  const XVector& x,
143  const Scalar& beta) {
144  using execution_space = typename AMatrix::execution_space;
145 
146  if (A.numRows() == 0) {
147  return;
148  }
149 
150  int team_size = -1;
151  int vector_length = -1;
152  int64_t rows_per_thread = -1;
153 
154  const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
155  int64_t worksets = (b.extent(0) + rows_per_team - 1) / rows_per_team;
156 
157  using Kokkos::Dynamic;
158  using Kokkos::Schedule;
159  using Kokkos::Static;
160  using Kokkos::TeamPolicy;
161  using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
162  using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
163  const char kernel_label[] = "scaled_damped_residual_vector";
164  policy_type_dynamic policyDynamic(1, 1);
165  policy_type_static policyStatic(1, 1);
166  if (team_size < 0) {
167  policyDynamic = policy_type_dynamic(worksets, Kokkos::AUTO, vector_length);
168  policyStatic = policy_type_static(worksets, Kokkos::AUTO, vector_length);
169  } else {
170  policyDynamic = policy_type_dynamic(worksets, team_size, vector_length);
171  policyStatic = policy_type_static(worksets, team_size, vector_length);
172  }
173 
174  // Canonicalize template arguments to avoid redundant instantiations.
175  using w_vec_type = typename WVector::non_const_type;
176  using d_vec_type = typename DVector::const_type;
177  using b_vec_type = typename BVector::const_type;
178  using matrix_type = AMatrix;
179  using x_vec_type = typename XVector::const_type;
180  using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
181 
182  if (beta == Kokkos::ArithTraits<Scalar>::zero()) {
183  constexpr bool use_beta = false;
184  using functor_type =
185  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
186  b_vec_type, matrix_type,
187  x_vec_type, scalar_type,
188  use_beta>;
189  functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
190  if (A.nnz() > 10000000)
191  Kokkos::parallel_for(kernel_label, policyDynamic, func);
192  else
193  Kokkos::parallel_for(kernel_label, policyStatic, func);
194  } else {
195  constexpr bool use_beta = true;
196  using functor_type =
197  ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
198  b_vec_type, matrix_type,
199  x_vec_type, scalar_type,
200  use_beta>;
201  functor_type func(alpha, w, d, b, A, x, beta, rows_per_team);
202  if (A.nnz() > 10000000)
203  Kokkos::parallel_for(kernel_label, policyDynamic, func);
204  else
205  Kokkos::parallel_for(kernel_label, policyStatic, func);
206  }
207 }
208 
209 } // namespace Impl
210 
211 template <class TpetraOperatorType>
212 ScaledDampedResidual<TpetraOperatorType>::
213  ScaledDampedResidual(const Teuchos::RCP<const operator_type>& A) {
214  setMatrix(A);
215 }
216 
217 template <class TpetraOperatorType>
218 void ScaledDampedResidual<TpetraOperatorType>::
219  setMatrix(const Teuchos::RCP<const operator_type>& A) {
220  if (A_op_.get() != A.get()) {
221  A_op_ = A;
222 
223  // We'll (re)allocate these on demand.
224  X_colMap_ = std::unique_ptr<vector_type>(nullptr);
225  V1_ = std::unique_ptr<multivector_type>(nullptr);
226 
227  using Teuchos::rcp_dynamic_cast;
229  rcp_dynamic_cast<const crs_matrix_type>(A);
230  if (A_crs.is_null()) {
231  A_crs_ = Teuchos::null;
232  imp_ = Teuchos::null;
233  exp_ = Teuchos::null;
234  } else {
235  TEUCHOS_ASSERT(A_crs->isFillComplete());
236  A_crs_ = A_crs;
237  auto G = A_crs->getCrsGraph();
238  imp_ = G->getImporter();
239  exp_ = G->getExporter();
240  }
241  }
242 }
243 
244 template <class TpetraOperatorType>
245 void ScaledDampedResidual<TpetraOperatorType>::
246  compute(multivector_type& W,
247  const SC& alpha,
248  vector_type& D_inv,
249  multivector_type& B,
250  multivector_type& X,
251  const SC& beta) {
252  using Teuchos::RCP;
253  using Teuchos::rcp;
254 
255  if (canFuse(B)) {
256  // "nonconst" here has no effect other than on the return type.
257  W_vec_ = W.getVectorNonConst(0);
258  B_vec_ = B.getVectorNonConst(0);
259  X_vec_ = X.getVectorNonConst(0);
260  TEUCHOS_ASSERT(!A_crs_.is_null());
261  fusedCase(*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
262  } else {
263  TEUCHOS_ASSERT(!A_op_.is_null());
264  unfusedCase(W, alpha, D_inv, B, *A_op_, X, beta);
265  }
266 }
267 
268 template <class TpetraOperatorType>
269 typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
270 ScaledDampedResidual<TpetraOperatorType>::
271  importVector(vector_type& X_domMap) {
272  if (imp_.is_null()) {
273  return X_domMap;
274  } else {
275  if (X_colMap_.get() == nullptr) {
276  using V = vector_type;
277  X_colMap_ = std::unique_ptr<V>(new V(imp_->getTargetMap()));
278  }
279  X_colMap_->doImport(X_domMap, *imp_, Tpetra::REPLACE);
280  return *X_colMap_;
281  }
282 }
283 
284 template <class TpetraOperatorType>
285 bool ScaledDampedResidual<TpetraOperatorType>::
286  canFuse(const multivector_type& B) const {
287  return B.getNumVectors() == size_t(1) &&
288  !A_crs_.is_null() &&
289  exp_.is_null();
290 }
291 
292 template <class TpetraOperatorType>
293 void ScaledDampedResidual<TpetraOperatorType>::
294  unfusedCase(multivector_type& W,
295  const SC& alpha,
296  vector_type& D_inv,
297  multivector_type& B,
298  const operator_type& A,
299  multivector_type& X,
300  const SC& beta) {
301  if (V1_.get() == nullptr) {
302  using MV = multivector_type;
303  const size_t numVecs = B.getNumVectors();
304  V1_ = std::unique_ptr<MV>(new MV(B.getMap(), numVecs));
305  }
306  const SC one = Teuchos::ScalarTraits<SC>::one();
307 
308  // V1 = B - A*X
309  Tpetra::deep_copy(*V1_, B);
310  A.apply(X, *V1_, Teuchos::NO_TRANS, -one, one);
311 
312  // W := alpha * D_inv * V1 + beta * W
313  W.elementWiseMultiply(alpha, D_inv, *V1_, beta);
314 }
315 
316 template <class TpetraOperatorType>
317 void ScaledDampedResidual<TpetraOperatorType>::
318  fusedCase(vector_type& W,
319  const SC& alpha,
320  vector_type& D_inv,
321  vector_type& B,
322  const crs_matrix_type& A,
323  vector_type& X,
324  const SC& beta) {
325  vector_type& X_colMap = importVector(X);
326 
327  using Impl::scaled_damped_residual_vector;
328  using STS = Teuchos::ScalarTraits<SC>;
329 
330  auto A_lcl = A.getLocalMatrixDevice();
331  auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
332  auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
333  auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
334  if (beta == STS::zero()) {
335  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
336  scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
337  B_lcl, A_lcl, X_lcl, beta);
338  } else { // need to read _and_ write W if beta != 0
339  auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
340  scaled_damped_residual_vector(alpha, W_lcl, Dinv_lcl,
341  B_lcl, A_lcl, X_lcl, beta);
342  }
343 }
344 
345 } // namespace Details
346 } // namespace Ifpack2
347 
348 #define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC, LO, GO, NT) \
349  template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
350 
351 #endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:39
T * get() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_ASSERT(assertion_test)
bool is_null() const