Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Relaxation_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_RELAXATION_DEF_HPP
11 #define IFPACK2_RELAXATION_DEF_HPP
12 
13 #include "Teuchos_StandardParameterEntryValidators.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Tpetra_BlockCrsMatrix.hpp"
17 #include "Tpetra_BlockView.hpp"
18 #include "Ifpack2_Utilities.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "MatrixMarket_Tpetra.hpp"
21 #include "Tpetra_Details_residual.hpp"
22 #include <cstdlib>
23 #include <memory>
24 #include <sstream>
25 #include "KokkosSparse_gauss_seidel.hpp"
26 
27 namespace {
28 // Validate that a given int is nonnegative.
29 class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
30  public:
31  // Constructor (does nothing).
32  NonnegativeIntValidator() {}
33 
34  // ParameterEntryValidator wants this method.
36  return Teuchos::null;
37  }
38 
39  // Actually validate the parameter's value.
40  void
41  validate(const Teuchos::ParameterEntry& entry,
42  const std::string& paramName,
43  const std::string& sublistName) const {
44  using std::endl;
45  Teuchos::any anyVal = entry.getAny(true);
46  const std::string entryName = entry.getAny(false).typeName();
47 
49  anyVal.type() != typeid(int),
51  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
52  << "\" has the wrong type." << endl
53  << "Parameter: " << paramName
54  << endl
55  << "Type specified: " << entryName << endl
56  << "Type required: int" << endl);
57 
58  const int val = Teuchos::any_cast<int>(anyVal);
61  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
62  << "\" is negative." << endl
63  << "Parameter: " << paramName
64  << endl
65  << "Value specified: " << val << endl
66  << "Required range: [0, INT_MAX]" << endl);
67  }
68 
69  // ParameterEntryValidator wants this method.
70  const std::string getXMLTypeName() const {
71  return "NonnegativeIntValidator";
72  }
73 
74  // ParameterEntryValidator wants this method.
75  void
76  printDoc(const std::string& docString,
77  std::ostream& out) const {
78  Teuchos::StrUtils::printLines(out, "# ", docString);
79  out << "#\tValidator Used: " << std::endl;
80  out << "#\t\tNonnegativeIntValidator" << std::endl;
81  }
82 };
83 
84 // A way to get a small positive number (eps() for floating-point
85 // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
86 // define it (for example, for integer values).
87 template <class Scalar, const bool isOrdinal = Teuchos::ScalarTraits<Scalar>::isOrdinal>
88 class SmallTraits {
89  public:
90  // Return eps if Scalar is a floating-point type, else return 1.
91  static const Scalar eps();
92 };
93 
94 // Partial specialization for when Scalar is not a floating-point type.
95 template <class Scalar>
96 class SmallTraits<Scalar, true> {
97  public:
98  static const Scalar eps() {
100  }
101 };
102 
103 // Partial specialization for when Scalar is a floating-point type.
104 template <class Scalar>
105 class SmallTraits<Scalar, false> {
106  public:
107  static const Scalar eps() {
109  }
110 };
111 
112 // Work-around for GitHub Issue #5269.
113 template <class Scalar,
114  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
115 struct RealTraits {};
116 
117 template <class Scalar>
118 struct RealTraits<Scalar, false> {
119  using val_type = Scalar;
120  using mag_type = Scalar;
121  static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
122  return z;
123  }
124 };
125 
126 template <class Scalar>
127 struct RealTraits<Scalar, true> {
128  using val_type = Scalar;
129  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
130  static KOKKOS_INLINE_FUNCTION mag_type real(const val_type& z) {
131  return Kokkos::ArithTraits<val_type>::real(z);
132  }
133 };
134 
135 template <class Scalar>
136 KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
137 getRealValue(const Scalar& z) {
138  return RealTraits<Scalar>::real(z);
139 }
140 
141 } // namespace
142 
143 namespace Ifpack2 {
144 
145 template <class MatrixType>
146 void Relaxation<MatrixType>::
147  updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
148  size_t numVecs) const {
149  // Allocate a multivector if the cached one isn't perfect. Checking
150  // for map pointer equality is much cheaper than Map::isSameAs.
151  if (cachedMV_.is_null() ||
152  map.get() != cachedMV_->getMap().get() ||
153  cachedMV_->getNumVectors() != numVecs) {
154  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
155  global_ordinal_type, node_type>;
156  cachedMV_ = Teuchos::rcp(new MV(map, numVecs, false));
157  }
158 }
159 
160 template <class MatrixType>
163  if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
164  Importer_ = Teuchos::null;
165  pointImporter_ = Teuchos::null;
166  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
167  isInitialized_ = false;
168  IsComputed_ = false;
169  diagOffsets_ = Kokkos::View<size_t*, device_type>();
170  savedDiagOffsets_ = false;
171  hasBlockCrsMatrix_ = false;
172  serialGaussSeidel_ = Teuchos::null;
173  if (!A.is_null()) {
174  IsParallel_ = (A->getRowMap()->getComm()->getSize() > 1);
175  }
176  A_ = A;
177  }
178 }
179 
180 template <class MatrixType>
183  : A_(A)
184  , IsParallel_((A.is_null() || A->getRowMap().is_null() || A->getRowMap()->getComm().is_null()) ? false : // a reasonable default if there's no communicator
185  A->getRowMap()->getComm()->getSize() > 1) {
186  this->setObjectLabel("Ifpack2::Relaxation");
187 }
188 
189 template <class MatrixType>
192  using Teuchos::Array;
194  using Teuchos::parameterList;
195  using Teuchos::RCP;
196  using Teuchos::rcp;
197  using Teuchos::rcp_const_cast;
198  using Teuchos::rcp_implicit_cast;
199  using Teuchos::setStringToIntegralParameter;
201 
202  if (validParams_.is_null()) {
203  RCP<ParameterList> pl = parameterList("Ifpack2::Relaxation");
204 
205  // Set a validator that automatically converts from the valid
206  // string options to their enum values.
207  Array<std::string> precTypes(8);
208  precTypes[0] = "Jacobi";
209  precTypes[1] = "Gauss-Seidel";
210  precTypes[2] = "Symmetric Gauss-Seidel";
211  precTypes[3] = "MT Gauss-Seidel";
212  precTypes[4] = "MT Symmetric Gauss-Seidel";
213  precTypes[5] = "Richardson";
214  precTypes[6] = "Two-stage Gauss-Seidel";
215  precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
216  Array<Details::RelaxationType> precTypeEnums(8);
217  precTypeEnums[0] = Details::JACOBI;
218  precTypeEnums[1] = Details::GS;
219  precTypeEnums[2] = Details::SGS;
220  precTypeEnums[3] = Details::MTGS;
221  precTypeEnums[4] = Details::MTSGS;
222  precTypeEnums[5] = Details::RICHARDSON;
223  precTypeEnums[6] = Details::GS2;
224  precTypeEnums[7] = Details::SGS2;
225  const std::string defaultPrecType("Jacobi");
226  setStringToIntegralParameter<Details::RelaxationType>("relaxation: type",
227  defaultPrecType, "Relaxation method", precTypes(), precTypeEnums(),
228  pl.getRawPtr());
229 
230  const int numSweeps = 1;
231  RCP<PEV> numSweepsValidator =
232  rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
233  pl->set("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
234  rcp_const_cast<const PEV>(numSweepsValidator));
235 
236  // number of 'local' outer sweeps for two-stage GS
237  const int numOuterSweeps = 1;
238  RCP<PEV> numOuterSweepsValidator =
239  rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
240  pl->set("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
241  rcp_const_cast<const PEV>(numOuterSweepsValidator));
242  // number of 'local' inner sweeps for two-stage GS
243  const int numInnerSweeps = 1;
244  RCP<PEV> numInnerSweepsValidator =
245  rcp_implicit_cast<PEV>(rcp(new NonnegativeIntValidator));
246  pl->set("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
247  rcp_const_cast<const PEV>(numInnerSweepsValidator));
248  // specify damping factor for the inner sweeps of two-stage GS
249  const scalar_type innerDampingFactor = STS::one();
250  pl->set("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
251  // specify whether to use sptrsv instead of inner-iterations for two-stage GS
252  const bool innerSpTrsv = false;
253  pl->set("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
254  // specify whether to use compact form of recurrence for two-stage GS
255  const bool compactForm = false;
256  pl->set("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
257 
258  const scalar_type dampingFactor = STS::one();
259  pl->set("relaxation: damping factor", dampingFactor);
260 
261  const bool zeroStartingSolution = true;
262  pl->set("relaxation: zero starting solution", zeroStartingSolution);
263 
264  const bool doBackwardGS = false;
265  pl->set("relaxation: backward mode", doBackwardGS);
266 
267  const bool doL1Method = false;
268  pl->set("relaxation: use l1", doL1Method);
269 
270  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
271  (STM::one() + STM::one()); // 1.5
272  pl->set("relaxation: l1 eta", l1eta);
273 
274  const scalar_type minDiagonalValue = STS::zero();
275  pl->set("relaxation: min diagonal value", minDiagonalValue);
276 
277  const bool fixTinyDiagEntries = false;
278  pl->set("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
279 
280  const bool checkDiagEntries = false;
281  pl->set("relaxation: check diagonal entries", checkDiagEntries);
282 
283  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
284  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
285 
286  const bool is_matrix_structurally_symmetric = false;
287  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
288 
289  const bool ifpack2_dump_matrix = false;
290  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
291 
292  const int cluster_size = 1;
293  pl->set("relaxation: mtgs cluster size", cluster_size);
294 
295  pl->set("relaxation: mtgs coloring algorithm", "Default");
296 
297  const int long_row_threshold = 0;
298  pl->set("relaxation: long row threshold", long_row_threshold);
299 
300  const bool timer_for_apply = true;
301  pl->set("timer for apply", timer_for_apply);
302 
303  validParams_ = rcp_const_cast<const ParameterList>(pl);
304  }
305  return validParams_;
306 }
307 
308 template <class MatrixType>
310  using Teuchos::getIntegralValue;
311  using Teuchos::getStringValue;
313  using Teuchos::RCP;
314  typedef scalar_type ST; // just to make code below shorter
315 
316  if (pl.isType<double>("relaxation: damping factor")) {
317  // Make sure that ST=complex can run with a damping factor that is
318  // a double.
319  ST df = pl.get<double>("relaxation: damping factor");
320  pl.remove("relaxation: damping factor");
321  pl.set("relaxation: damping factor", df);
322  }
323 
325 
326  const Details::RelaxationType precType =
327  getIntegralValue<Details::RelaxationType>(pl, "relaxation: type");
328  const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
329  // We only access "relaxation: type" using strings in the rest of the code
330  pl.set<std::string>("relaxation: type", precTypeStr);
331  pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
332  const int numSweeps = pl.get<int>("relaxation: sweeps");
333  const ST dampingFactor = pl.get<ST>("relaxation: damping factor");
334  const bool zeroStartSol = pl.get<bool>("relaxation: zero starting solution");
335  const bool doBackwardGS = pl.get<bool>("relaxation: backward mode");
336  const bool doL1Method = pl.get<bool>("relaxation: use l1");
337  const magnitude_type l1Eta = pl.get<magnitude_type>("relaxation: l1 eta");
338  const ST minDiagonalValue = pl.get<ST>("relaxation: min diagonal value");
339  const bool fixTinyDiagEntries = pl.get<bool>("relaxation: fix tiny diagonal entries");
340  const bool checkDiagEntries = pl.get<bool>("relaxation: check diagonal entries");
341  const bool is_matrix_structurally_symmetric = pl.get<bool>("relaxation: symmetric matrix structure");
342  const bool ifpack2_dump_matrix = pl.get<bool>("relaxation: ifpack2 dump matrix");
343  const bool timer_for_apply = pl.get<bool>("timer for apply");
344  int cluster_size = 1;
345  if (pl.isParameter("relaxation: mtgs cluster size")) // optional parameter
346  cluster_size = pl.get<int>("relaxation: mtgs cluster size");
347  int long_row_threshold = 0;
348  if (pl.isParameter("relaxation: long row threshold")) // optional parameter
349  long_row_threshold = pl.get<int>("relaxation: long row threshold");
350  std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
351  // convert to lowercase
352  for (char& c : color_algo_name)
353  c = tolower(c);
354  if (color_algo_name == "default")
355  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
356  else if (color_algo_name == "serial")
357  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
358  else if (color_algo_name == "vb")
359  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
360  else if (color_algo_name == "vbbit")
361  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
362  else if (color_algo_name == "vbcs")
363  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
364  else if (color_algo_name == "vbd")
365  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
366  else if (color_algo_name == "vbdbit")
367  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
368  else if (color_algo_name == "eb")
369  this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
370  else {
371  std::ostringstream msg;
372  msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
373  msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
375  true, std::invalid_argument, msg.str());
376  }
377 
378  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type>>("relaxation: local smoothing indices");
379 
380  // for Two-stage Gauss-Seidel
381  if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
382  // Make sure that ST=complex can run with a damping factor that is
383  // a double.
384  ST df = pl.get<double>("relaxation: inner damping factor");
385  pl.remove("relaxation: inner damping factor");
386  pl.set("relaxation: inner damping factor", df);
387  }
388  // If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
389  if (long_row_threshold > 0) {
391  cluster_size != 1, std::invalid_argument,
392  "Ifpack2::Relaxation: "
393  "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
395  precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
396  std::invalid_argument,
397  "Ifpack2::Relaxation: "
398  "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
399  "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
400  }
401 
402  const ST innerDampingFactor = pl.get<ST>("relaxation: inner damping factor");
403  const int numInnerSweeps = pl.get<int>("relaxation: inner sweeps");
404  const int numOuterSweeps = pl.get<int>("relaxation: outer sweeps");
405  const bool innerSpTrsv = pl.get<bool>("relaxation: inner sparse-triangular solve");
406  const bool compactForm = pl.get<bool>("relaxation: compact form");
407 
408  // "Commit" the changes, now that we've validated everything.
409  PrecType_ = precType;
410  NumSweeps_ = numSweeps;
411  DampingFactor_ = dampingFactor;
412  ZeroStartingSolution_ = zeroStartSol;
413  DoBackwardGS_ = doBackwardGS;
414  DoL1Method_ = doL1Method;
415  L1Eta_ = l1Eta;
416  MinDiagonalValue_ = minDiagonalValue;
417  fixTinyDiagEntries_ = fixTinyDiagEntries;
418  checkDiagEntries_ = checkDiagEntries;
419  clusterSize_ = cluster_size;
420  longRowThreshold_ = long_row_threshold;
421  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
422  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
423  localSmoothingIndices_ = localSmoothingIndices;
424  // for Two-stage GS
425  NumInnerSweeps_ = numInnerSweeps;
426  NumOuterSweeps_ = numOuterSweeps;
427  InnerSpTrsv_ = innerSpTrsv;
428  InnerDampingFactor_ = innerDampingFactor;
429  CompactForm_ = compactForm;
430  TimerForApply_ = timer_for_apply;
431 }
432 
433 template <class MatrixType>
435  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
436  // but otherwise, we will get [unused] in pl
437  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
438 }
439 
440 template <class MatrixType>
444  A_.is_null(), std::runtime_error,
445  "Ifpack2::Relaxation::getComm: "
446  "The input matrix A is null. Please call setMatrix() with a nonnull "
447  "input matrix before calling this method.");
448  return A_->getRowMap()->getComm();
449 }
450 
451 template <class MatrixType>
454  return A_;
455 }
456 
457 template <class MatrixType>
458 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
459  typename MatrixType::global_ordinal_type,
460  typename MatrixType::node_type>>
463  A_.is_null(), std::runtime_error,
464  "Ifpack2::Relaxation::getDomainMap: "
465  "The input matrix A is null. Please call setMatrix() with a nonnull "
466  "input matrix before calling this method.");
467  return A_->getDomainMap();
468 }
469 
470 template <class MatrixType>
471 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
472  typename MatrixType::global_ordinal_type,
473  typename MatrixType::node_type>>
476  A_.is_null(), std::runtime_error,
477  "Ifpack2::Relaxation::getRangeMap: "
478  "The input matrix A is null. Please call setMatrix() with a nonnull "
479  "input matrix before calling this method.");
480  return A_->getRangeMap();
481 }
482 
483 template <class MatrixType>
485  return true;
486 }
487 
488 template <class MatrixType>
490  return (NumInitialize_);
491 }
492 
493 template <class MatrixType>
495  return (NumCompute_);
496 }
497 
498 template <class MatrixType>
500  return (NumApply_);
501 }
502 
503 template <class MatrixType>
505  return (InitializeTime_);
506 }
507 
508 template <class MatrixType>
510  return (ComputeTime_);
511 }
512 
513 template <class MatrixType>
515  return (ApplyTime_);
516 }
517 
518 template <class MatrixType>
520  return (ComputeFlops_);
521 }
522 
523 template <class MatrixType>
525  return (ApplyFlops_);
526 }
527 
528 template <class MatrixType>
531  A_.is_null(), std::runtime_error,
532  "Ifpack2::Relaxation::getNodeSmootherComplexity: "
533  "The input matrix A is null. Please call setMatrix() with a nonnull "
534  "input matrix, then call compute(), before calling this method.");
535  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
536  return A_->getLocalNumRows() + A_->getLocalNumEntries();
537 }
538 
539 template <class MatrixType>
541  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
542  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
543  Teuchos::ETransp /* mode */,
544  scalar_type alpha,
545  scalar_type beta) const {
546  using Teuchos::as;
547  using Teuchos::RCP;
548  using Teuchos::rcp;
549  using Teuchos::rcpFromRef;
550  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
552  MV;
554  A_.is_null(), std::runtime_error,
555  "Ifpack2::Relaxation::apply: "
556  "The input matrix A is null. Please call setMatrix() with a nonnull "
557  "input matrix, then call compute(), before calling this method.");
559  !isComputed(),
560  std::runtime_error,
561  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
562  "preconditioner instance before you may call apply(). You may call "
563  "isComputed() to find out if compute() has been called already.");
564  TEUCHOS_TEST_FOR_EXCEPTION(
565  X.getNumVectors() != Y.getNumVectors(),
566  std::runtime_error,
567  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
568  "X has "
569  << X.getNumVectors() << " columns, but Y has "
570  << Y.getNumVectors() << " columns.");
571  TEUCHOS_TEST_FOR_EXCEPTION(
572  beta != STS::zero(), std::logic_error,
573  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
574  "implemented.");
575 
577  const std::string timerName("Ifpack2::Relaxation::apply");
578  if (TimerForApply_) {
579  timer = Teuchos::TimeMonitor::lookupCounter(timerName);
580  if (timer.is_null()) {
581  timer = Teuchos::TimeMonitor::getNewCounter(timerName);
582  }
583  }
584 
585  Teuchos::Time time = Teuchos::Time(timerName);
586  double startTime = time.wallTime();
587 
588  {
590  if (TimerForApply_)
591  timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
592 
593  // Special case: alpha == 0.
594  if (alpha == STS::zero()) {
595  // No floating-point operations, so no need to update a count.
596  Y.putScalar(STS::zero());
597  } else {
598  // If X and Y alias one another, then we need to create an
599  // auxiliary vector, Xcopy (a deep copy of X).
600  RCP<const MV> Xcopy;
601  {
602  if (X.aliases(Y)) {
603  Xcopy = rcp(new MV(X, Teuchos::Copy));
604  } else {
605  Xcopy = rcpFromRef(X);
606  }
607  }
608 
609  // Each of the following methods updates the flop count itself.
610  // All implementations handle zeroing out the starting solution
611  // (if necessary) themselves.
612  switch (PrecType_) {
613  case Ifpack2::Details::JACOBI:
614  ApplyInverseJacobi(*Xcopy, Y);
615  break;
616  case Ifpack2::Details::GS:
617  ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
618  break;
619  case Ifpack2::Details::SGS:
620  ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
621  break;
622  case Ifpack2::Details::MTGS:
623  case Ifpack2::Details::GS2:
624  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
625  break;
626  case Ifpack2::Details::MTSGS:
627  case Ifpack2::Details::SGS2:
628  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
629  break;
630  case Ifpack2::Details::RICHARDSON:
631  ApplyInverseRichardson(*Xcopy, Y);
632  break;
633 
634  default:
635  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
636  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
637  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
638  }
639  if (alpha != STS::one()) {
640  Y.scale(alpha);
641  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
642  const double numVectors = as<double>(Y.getNumVectors());
643  ApplyFlops_ += numGlobalRows * numVectors;
644  }
645  }
646  }
647  ApplyTime_ += (time.wallTime() - startTime);
648  ++NumApply_;
649 }
650 
651 template <class MatrixType>
653  applyMat(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
654  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
655  Teuchos::ETransp mode) const {
657  A_.is_null(), std::runtime_error,
658  "Ifpack2::Relaxation::applyMat: "
659  "The input matrix A is null. Please call setMatrix() with a nonnull "
660  "input matrix, then call compute(), before calling this method.");
662  !isComputed(), std::runtime_error,
663  "Ifpack2::Relaxation::applyMat: "
664  "isComputed() must be true before you may call applyMat(). "
665  "Please call compute() before calling this method.");
666  TEUCHOS_TEST_FOR_EXCEPTION(
667  X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
668  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors()
669  << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
670  A_->apply(X, Y, mode);
671 }
672 
673 template <class MatrixType>
675  const char methodName[] = "Ifpack2::Relaxation::initialize";
676 
677  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": The "
678  "input matrix A is null. Please call setMatrix() with "
679  "a nonnull input matrix before calling this method.");
680 
683 
684  double startTime = timer->wallTime();
685 
686  { // Timing of initialize starts here
687  Teuchos::TimeMonitor timeMon(*timer);
688  isInitialized_ = false;
689 
690  {
691  auto rowMap = A_->getRowMap();
692  auto comm = rowMap.is_null() ? Teuchos::null : rowMap->getComm();
693  IsParallel_ = !comm.is_null() && comm->getSize() > 1;
694  }
695 
696  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
697  // but in that case, the domain and column Maps are the same and
698  // we don't need to Import anyway.
699  //
700  // mfh 07 May 2019: A_->getGraph() might be an
701  // OverlappingRowGraph, which doesn't have an Import object.
702  // However, in that case, the comm should have size 1.
703  Importer_ = IsParallel_ ? A_->getGraph()->getImporter() : Teuchos::null;
704 
705  {
707  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
708  hasBlockCrsMatrix_ = !A_bcrs.is_null();
709  }
710 
711  serialGaussSeidel_ = Teuchos::null;
712 
713  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
714  PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
715  auto crsMat = Details::getCrsMatrix(A_);
716  TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
717  "Multithreaded Gauss-Seidel methods currently only work "
718  "when the input matrix is a Tpetra::CrsMatrix.");
719 
720  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
721  // compute, not initialize, since users may change the matrix's
722  // values at any time before calling compute.
723  if (ifpack2_dump_matrix_) {
724  static int sequence_number = 0;
725  const std::string file_name = "Ifpack2_MT_GS_" +
726  std::to_string(sequence_number++) + ".mtx";
727  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
728  writer_type::writeSparseFile(file_name, crsMat);
729  }
730 
731  this->mtKernelHandle_ = Teuchos::rcp(new mt_kernel_handle_type());
732  if (mtKernelHandle_->get_gs_handle() == nullptr) {
733  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
734  mtKernelHandle_->create_gs_handle(KokkosSparse::GS_TWOSTAGE);
735  else if (this->clusterSize_ == 1) {
736  mtKernelHandle_->create_gs_handle(KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
737  mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
738  } else
739  mtKernelHandle_->create_gs_handle(KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
740  }
741  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
742  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
743  // set parameters for two-stage GS
744  mtKernelHandle_->set_gs_set_num_inner_sweeps(NumInnerSweeps_);
745  mtKernelHandle_->set_gs_set_num_outer_sweeps(NumOuterSweeps_);
746  mtKernelHandle_->set_gs_set_inner_damp_factor(InnerDampingFactor_);
747  mtKernelHandle_->set_gs_twostage(!InnerSpTrsv_, A_->getLocalNumRows());
748  mtKernelHandle_->set_gs_twostage_compact_form(CompactForm_);
749  }
750 
751  KokkosSparse::gauss_seidel_symbolic(
752  mtKernelHandle_.getRawPtr(),
753  A_->getLocalNumRows(),
754  A_->getLocalNumCols(),
755  kcsr.graph.row_map,
756  kcsr.graph.entries,
757  is_matrix_structurally_symmetric_);
758  }
759  } // timing of initialize stops here
760 
761  InitializeTime_ += (timer->wallTime() - startTime);
762  ++NumInitialize_;
763  isInitialized_ = true;
764 }
765 
766 namespace Impl {
767 template <typename BlockDiagView>
768 struct InvertDiagBlocks {
769  typedef typename BlockDiagView::size_type Size;
770 
771  private:
772  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
773  template <typename View>
774  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
775  typename View::device_type, Unmanaged>;
776 
777  typedef typename BlockDiagView::non_const_value_type Scalar;
778  typedef typename BlockDiagView::device_type Device;
779  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
780  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
781 
782  UnmanagedView<BlockDiagView> block_diag_;
783  // TODO Use thread team and scratch memory space. In this first
784  // pass, provide workspace for each block.
785  RWrk rwrk_buf_;
786  UnmanagedView<RWrk> rwrk_;
787  IWrk iwrk_buf_;
788  UnmanagedView<IWrk> iwrk_;
789 
790  public:
791  InvertDiagBlocks(BlockDiagView& block_diag)
792  : block_diag_(block_diag) {
793  const auto blksz = block_diag.extent(1);
794  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
795  rwrk_ = rwrk_buf_;
796  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
797  iwrk_ = iwrk_buf_;
798  }
799 
800  KOKKOS_INLINE_FUNCTION
801  void operator()(const Size i, int& jinfo) const {
802  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
803  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
804  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
805  int info = 0;
806  Tpetra::GETF2(D_cur, ipiv, info);
807  if (info) {
808  ++jinfo;
809  return;
810  }
811  Tpetra::GETRI(D_cur, ipiv, work, info);
812  if (info) ++jinfo;
813  }
814 };
815 } // namespace Impl
816 
817 template <class MatrixType>
818 void Relaxation<MatrixType>::computeBlockCrs() {
819  using Kokkos::ALL;
820  using Teuchos::Array;
821  using Teuchos::ArrayRCP;
822  using Teuchos::ArrayView;
823  using Teuchos::as;
824  using Teuchos::Comm;
825  using Teuchos::RCP;
826  using Teuchos::rcp;
827  using Teuchos::rcp_dynamic_cast;
828  using Teuchos::REDUCE_MAX;
829  using Teuchos::REDUCE_MIN;
830  using Teuchos::REDUCE_SUM;
831  using Teuchos::reduceAll;
832  typedef local_ordinal_type LO;
833 
834  const std::string timerName("Ifpack2::Relaxation::computeBlockCrs");
836  if (timer.is_null()) {
837  timer = Teuchos::TimeMonitor::getNewCounter(timerName);
838  }
839  double startTime = timer->wallTime();
840  {
841  Teuchos::TimeMonitor timeMon(*timer);
842 
844  A_.is_null(), std::runtime_error,
845  "Ifpack2::Relaxation::"
846  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
847  "with a nonnull input matrix, then call initialize(), before calling "
848  "this method.");
849  auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type>(A_);
851  blockCrsA.is_null(), std::logic_error,
852  "Ifpack2::Relaxation::"
853  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
854  "got this far. Please report this bug to the Ifpack2 developers.");
855 
856  const scalar_type one = STS::one();
857 
858  // Reset state.
859  IsComputed_ = false;
860 
861  const LO lclNumMeshRows =
862  blockCrsA->getCrsGraph().getLocalNumRows();
863  const LO blockSize = blockCrsA->getBlockSize();
864 
865  if (!savedDiagOffsets_) {
866  blockDiag_ = block_diag_type(); // clear it before reallocating
867  blockDiag_ = block_diag_type("Ifpack2::Relaxation::blockDiag_",
868  lclNumMeshRows, blockSize, blockSize);
869  if (Teuchos::as<LO>(diagOffsets_.extent(0)) < lclNumMeshRows) {
870  // Clear diagOffsets_ first (by assigning an empty View to it)
871  // to save memory, before reallocating.
872  diagOffsets_ = Kokkos::View<size_t*, device_type>();
873  diagOffsets_ = Kokkos::View<size_t*, device_type>("offsets", lclNumMeshRows);
874  }
875  blockCrsA->getCrsGraph().getLocalDiagOffsets(diagOffsets_);
876  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(diagOffsets_.extent(0)) !=
877  static_cast<size_t>(blockDiag_.extent(0)),
878  std::logic_error, "diagOffsets_.extent(0) = " << diagOffsets_.extent(0) << " != blockDiag_.extent(0) = " << blockDiag_.extent(0) << ". Please report this bug to the Ifpack2 developers.");
879  savedDiagOffsets_ = true;
880  }
881  blockCrsA->getLocalDiagCopy(blockDiag_, diagOffsets_);
882 
883  // Use an unmanaged View in this method, so that when we take
884  // subviews of it (to get each diagonal block), we don't have to
885  // touch the reference count. Reference count updates are a
886  // thread scalability bottleneck and have a performance cost even
887  // without using threads.
888  unmanaged_block_diag_type blockDiag = blockDiag_;
889 
890  if (DoL1Method_ && IsParallel_) {
891  const scalar_type two = one + one;
892  const size_t maxLength = A_->getLocalMaxNumRowEntries();
893  nonconst_local_inds_host_view_type indices("indices", maxLength);
894  nonconst_values_host_view_type values_("values", maxLength * blockSize * blockSize);
895  size_t numEntries = 0;
896 
897  for (LO i = 0; i < lclNumMeshRows; ++i) {
898  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
899  blockCrsA->getLocalRowCopy(i, indices, values_, numEntries);
900  scalar_type* values = reinterpret_cast<scalar_type*>(values_.data());
901 
902  auto diagBlock = Kokkos::subview(blockDiag, i, ALL(), ALL());
903  for (LO subRow = 0; subRow < blockSize; ++subRow) {
904  magnitude_type diagonal_boost = STM::zero();
905  for (size_t k = 0; k < numEntries; ++k) {
906  if (indices[k] >= lclNumMeshRows) {
907  const size_t offset = blockSize * blockSize * k + subRow * blockSize;
908  for (LO subCol = 0; subCol < blockSize; ++subCol) {
909  diagonal_boost += STS::magnitude(values[offset + subCol] / two);
910  }
911  }
912  }
913  if (STS::magnitude(diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
914  diagBlock(subRow, subRow) += diagonal_boost;
915  }
916  }
917  }
918  }
919 
920  int info = 0;
921  {
922  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
923  typedef typename unmanaged_block_diag_type::execution_space exec_space;
924  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
925 
926  Kokkos::parallel_reduce(range_type(0, lclNumMeshRows), idb, info);
927  // FIXME (mfh 19 May 2017) Different processes may not
928  // necessarily have this error all at the same time, so it would
929  // be better just to preserve a local error state and let users
930  // check.
931  TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info << " diagonal blocks.");
932  }
933 
934  // In a debug build, do an extra test to make sure that all the
935  // factorizations were computed correctly.
936 #ifdef HAVE_IFPACK2_DEBUG
937  const int numResults = 2;
938  // Use "max = -min" trick to get min and max in a single all-reduce.
939  int lclResults[2], gblResults[2];
940  lclResults[0] = info;
941  lclResults[1] = -info;
942  gblResults[0] = 0;
943  gblResults[1] = 0;
944  reduceAll<int, int>(*(A_->getGraph()->getComm()), REDUCE_MIN,
945  numResults, lclResults, gblResults);
946  TEUCHOS_TEST_FOR_EXCEPTION(gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
947  "Ifpack2::Relaxation::compute: When processing the input "
948  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
949  "failed on one or more (MPI) processes.");
950 #endif // HAVE_IFPACK2_DEBUG
951  serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
952  } // end TimeMonitor scope
953 
954  ComputeTime_ += (timer->wallTime() - startTime);
955  ++NumCompute_;
956  IsComputed_ = true;
957 }
958 
959 template <class MatrixType>
961  using Teuchos::Array;
962  using Teuchos::ArrayRCP;
963  using Teuchos::ArrayView;
964  using Teuchos::as;
965  using Teuchos::Comm;
966  using Teuchos::RCP;
967  using Teuchos::rcp;
968  using Teuchos::REDUCE_MAX;
969  using Teuchos::REDUCE_MIN;
970  using Teuchos::REDUCE_SUM;
971  using Teuchos::reduceAll;
972  using LO = local_ordinal_type;
973  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
975  using IST = typename vector_type::impl_scalar_type;
976  using KAT = Kokkos::ArithTraits<IST>;
977 
978  const char methodName[] = "Ifpack2::Relaxation::compute";
979  const scalar_type zero = STS::zero();
980  const scalar_type one = STS::one();
981 
982  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, methodName << ": "
983  "The input matrix A is null. Please call setMatrix() with a nonnull "
984  "input matrix, then call initialize(), before calling this method.");
985 
986  // We don't count initialization in compute() time.
987  if (!isInitialized()) {
988  initialize();
989  }
990 
991  if (hasBlockCrsMatrix_) {
992  computeBlockCrs();
993  return;
994  }
995 
998  double startTime = timer->wallTime();
999 
1000  { // Timing of compute starts here.
1001  Teuchos::TimeMonitor timeMon(*timer);
1002 
1003  // Precompute some quantities for "fixing" zero or tiny diagonal
1004  // entries. We'll only use them if this "fixing" is enabled.
1005  //
1006  // SmallTraits covers for the lack of eps() in
1007  // Teuchos::ScalarTraits when its template parameter is not a
1008  // floating-point type. (Ifpack2 sometimes gets instantiated for
1009  // integer Scalar types.)
1010  IST oneOverMinDiagVal = KAT::one() / static_cast<IST>(SmallTraits<scalar_type>::eps());
1011  if (MinDiagonalValue_ != zero)
1012  oneOverMinDiagVal = KAT::one() / static_cast<IST>(MinDiagonalValue_);
1013 
1014  // It's helpful not to have to recompute this magnitude each time.
1015  const magnitude_type minDiagValMag = STS::magnitude(MinDiagonalValue_);
1016 
1017  const LO numMyRows = static_cast<LO>(A_->getLocalNumRows());
1018 
1019  TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, methodName << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1020  "Please report this bug to the Ifpack2 developers.");
1021  IsComputed_ = false;
1022 
1023  if (Diagonal_.is_null()) {
1024  // A_->getLocalDiagCopy fills in all Vector entries, even if the
1025  // matrix has no stored entries in the corresponding rows.
1026  Diagonal_ = rcp(new vector_type(A_->getRowMap(), false));
1027  }
1028 
1029  if (checkDiagEntries_) {
1030  //
1031  // Check diagonal elements, replace zero elements with the minimum
1032  // diagonal value, and store their inverses. Count the number of
1033  // "small" and zero diagonal entries while we're at it.
1034  //
1035  size_t numSmallDiagEntries = 0; // "small" includes zero
1036  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1037  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1038  magnitude_type minMagDiagEntryMag = STM::zero();
1039  magnitude_type maxMagDiagEntryMag = STM::zero();
1040 
1041  // FIXME: We are extracting the diagonal more than once. That
1042  // isn't very efficient, but we shouldn't be checking
1043  // diagonal entries at scale anyway.
1044  A_->getLocalDiagCopy(*Diagonal_); // slow path for row matrix
1045  std::unique_ptr<vector_type> origDiag;
1046  origDiag = std::unique_ptr<vector_type>(new vector_type(*Diagonal_, Teuchos::Copy));
1047  auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1048  auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1049  // As we go, keep track of the diagonal entries with the
1050  // least and greatest magnitude. We could use the trick of
1051  // starting min with +Inf and max with -Inf, but that
1052  // doesn't work if scalar_type is a built-in integer type.
1053  // Thus, we have to start by reading the first diagonal
1054  // entry redundantly.
1055  if (numMyRows != 0) {
1056  const magnitude_type d_0_mag = KAT::abs(diag(0));
1057  minMagDiagEntryMag = d_0_mag;
1058  maxMagDiagEntryMag = d_0_mag;
1059  }
1060 
1061  // Go through all the diagonal entries. Compute counts of
1062  // small-magnitude, zero, and negative-real-part entries.
1063  // Invert the diagonal entries that aren't too small. For
1064  // those too small in magnitude, replace them with
1065  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1066  // happens to be zero).
1067  for (LO i = 0; i < numMyRows; ++i) {
1068  const IST d_i = diag(i);
1069  const magnitude_type d_i_mag = KAT::abs(d_i);
1070  // Work-around for GitHub Issue #5269.
1071  // const magnitude_type d_i_real = KAT::real (d_i);
1072  const auto d_i_real = getRealValue(d_i);
1073 
1074  // We can't compare complex numbers, but we can compare their
1075  // real parts.
1076  if (d_i_real < STM::zero()) {
1077  ++numNegDiagEntries;
1078  }
1079  if (d_i_mag < minMagDiagEntryMag) {
1080  minMagDiagEntryMag = d_i_mag;
1081  }
1082  if (d_i_mag > maxMagDiagEntryMag) {
1083  maxMagDiagEntryMag = d_i_mag;
1084  }
1085 
1086  if (fixTinyDiagEntries_) {
1087  // <= not <, in case minDiagValMag is zero.
1088  if (d_i_mag <= minDiagValMag) {
1089  ++numSmallDiagEntries;
1090  if (d_i_mag == STM::zero()) {
1091  ++numZeroDiagEntries;
1092  }
1093  diag(i) = oneOverMinDiagVal;
1094  } else {
1095  diag(i) = KAT::one() / d_i;
1096  }
1097  } else { // Don't fix zero or tiny diagonal entries.
1098  // <= not <, in case minDiagValMag is zero.
1099  if (d_i_mag <= minDiagValMag) {
1100  ++numSmallDiagEntries;
1101  if (d_i_mag == STM::zero()) {
1102  ++numZeroDiagEntries;
1103  }
1104  }
1105  diag(i) = KAT::one() / d_i;
1106  }
1107  }
1108 
1109  // Collect global data about the diagonal entries. Only do this
1110  // (which involves O(1) all-reduces) if the user asked us to do
1111  // the extra work.
1112  //
1113  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1114  // zero rows. Fixing this requires one of two options:
1115  //
1116  // 1. Use a custom reduction operation that ignores processes
1117  // with zero diagonal entries.
1118  // 2. Split the communicator, compute all-reduces using the
1119  // subcommunicator over processes that have a nonzero number
1120  // of diagonal entries, and then broadcast from one of those
1121  // processes (if there is one) to the processes in the other
1122  // subcommunicator.
1123  const Comm<int>& comm = *(A_->getRowMap()->getComm());
1124 
1125  // Compute global min and max magnitude of entries.
1126  Array<magnitude_type> localVals(2);
1127  localVals[0] = minMagDiagEntryMag;
1128  // (- (min (- x))) is the same as (max x).
1129  localVals[1] = -maxMagDiagEntryMag;
1130  Array<magnitude_type> globalVals(2);
1131  reduceAll<int, magnitude_type>(comm, REDUCE_MIN, 2,
1132  localVals.getRawPtr(),
1133  globalVals.getRawPtr());
1134  globalMinMagDiagEntryMag_ = globalVals[0];
1135  globalMaxMagDiagEntryMag_ = -globalVals[1];
1136 
1137  Array<size_t> localCounts(3);
1138  localCounts[0] = numSmallDiagEntries;
1139  localCounts[1] = numZeroDiagEntries;
1140  localCounts[2] = numNegDiagEntries;
1141  Array<size_t> globalCounts(3);
1142  reduceAll<int, size_t>(comm, REDUCE_SUM, 3,
1143  localCounts.getRawPtr(),
1144  globalCounts.getRawPtr());
1145  globalNumSmallDiagEntries_ = globalCounts[0];
1146  globalNumZeroDiagEntries_ = globalCounts[1];
1147  globalNumNegDiagEntries_ = globalCounts[2];
1148 
1149  // Compute and save the difference between the computed inverse
1150  // diagonal, and the original diagonal's inverse.
1151  vector_type diff(A_->getRowMap());
1152  diff.reciprocal(*origDiag);
1153  diff.update(-one, *Diagonal_, one);
1154  globalDiagNormDiff_ = diff.norm2();
1155  }
1156 
1157  // Extract the diagonal entries. The CrsMatrix graph version is
1158  // faster than the row matrix version for subsequent calls to
1159  // compute(), since it caches the diagonal offsets.
1160 
1161  bool debugAgainstSlowPath = false;
1162 
1163  auto crsMat = Details::getCrsMatrix(A_);
1164 
1165  if (crsMat.get() && crsMat->isFillComplete()) {
1166  // The invDiagKernel object computes diagonal offsets if
1167  // necessary. The "compute" call extracts diagonal enties,
1168  // optionally applies the L1 method and replacement of small
1169  // entries, and then inverts.
1170  if (invDiagKernel_.is_null())
1171  invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1172  else
1173  invDiagKernel_->setMatrix(crsMat);
1174  invDiagKernel_->compute(*Diagonal_,
1175  DoL1Method_ && IsParallel_, L1Eta_,
1176  fixTinyDiagEntries_, minDiagValMag);
1177  savedDiagOffsets_ = true;
1178 
1179  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1180  // environment variable to control this behavior at run time.
1181 #ifdef HAVE_IFPACK2_DEBUG
1182  debugAgainstSlowPath = true;
1183 #endif
1184  }
1185 
1186  if (crsMat.is_null() || !crsMat->isFillComplete() || debugAgainstSlowPath) {
1187  // We could not call the CrsMatrix version above, or want to
1188  // debug by comparing against the slow path.
1189 
1190  // FIXME: The L1 method in this code path runs on host, since we
1191  // don't have device access for row matrices.
1192 
1193  RCP<vector_type> Diagonal;
1194  if (debugAgainstSlowPath)
1195  Diagonal = rcp(new vector_type(A_->getRowMap()));
1196  else
1197  Diagonal = Diagonal_;
1198 
1199  A_->getLocalDiagCopy(*Diagonal); // slow path for row matrix
1200 
1201  // Setup for L1 Methods.
1202  // Here we add half the value of the off-processor entries in the row,
1203  // but only if diagonal isn't sufficiently large.
1204  //
1205  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1206  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1207  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1208  //
1209  if (DoL1Method_ && IsParallel_) {
1210  const row_matrix_type& A_row = *A_;
1211  auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1212  const magnitude_type two = STM::one() + STM::one();
1213  const size_t maxLength = A_row.getLocalMaxNumRowEntries();
1214  nonconst_local_inds_host_view_type indices("indices", maxLength);
1215  nonconst_values_host_view_type values("values", maxLength);
1216  size_t numEntries;
1217 
1218  for (LO i = 0; i < numMyRows; ++i) {
1219  A_row.getLocalRowCopy(i, indices, values, numEntries);
1220  magnitude_type diagonal_boost = STM::zero();
1221  for (size_t k = 0; k < numEntries; ++k) {
1222  if (indices[k] >= numMyRows) {
1223  diagonal_boost += STS::magnitude(values[k] / two);
1224  }
1225  }
1226  if (KAT::magnitude(diag(i, 0)) < L1Eta_ * diagonal_boost) {
1227  diag(i, 0) += diagonal_boost;
1228  }
1229  }
1230  }
1231 
1232  //
1233  // Invert the matrix's diagonal entries (in Diagonal).
1234  //
1235  if (fixTinyDiagEntries_) {
1236  // Go through all the diagonal entries. Invert those that
1237  // aren't too small in magnitude. For those that are too
1238  // small in magnitude, replace them with oneOverMinDiagVal.
1239  auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1240  Kokkos::parallel_for(
1241  Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1242  KOKKOS_LAMBDA(local_ordinal_type i) {
1243  auto d_i = localDiag(i, 0);
1244  const magnitude_type d_i_mag = KAT::magnitude(d_i);
1245  // <= not <, in case minDiagValMag is zero.
1246  if (d_i_mag <= minDiagValMag) {
1247  d_i = oneOverMinDiagVal;
1248  } else {
1249  // For Stokhos types, operator/ returns an expression
1250  // type. Explicitly convert to IST before returning.
1251  d_i = IST(KAT::one() / d_i);
1252  }
1253  localDiag(i, 0) = d_i;
1254  });
1255  } else { // don't fix tiny or zero diagonal entries
1256  Diagonal->reciprocal(*Diagonal);
1257  }
1258 
1259  if (debugAgainstSlowPath) {
1260  // Validate the fast-path diagonal against the slow-path diagonal.
1261  Diagonal->update(STS::one(), *Diagonal_, -STS::one());
1262  const magnitude_type err = Diagonal->normInf();
1263  // The two diagonals should be exactly the same, so their
1264  // difference should be exactly zero.
1265  TEUCHOS_TEST_FOR_EXCEPTION(err > 100 * STM::eps(), std::logic_error, methodName << ": "
1266  << "\"fast-path\" diagonal computation failed. "
1267  "\\|D1 - D2\\|_inf = "
1268  << err << ".");
1269  }
1270  }
1271 
1272  // Count floating-point operations of computing the inverse diagonal.
1273  //
1274  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1275  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1276  ComputeFlops_ += 4.0 * numMyRows;
1277  } else {
1278  ComputeFlops_ += numMyRows;
1279  }
1280 
1281  if (PrecType_ == Ifpack2::Details::MTGS ||
1282  PrecType_ == Ifpack2::Details::MTSGS ||
1283  PrecType_ == Ifpack2::Details::GS2 ||
1284  PrecType_ == Ifpack2::Details::SGS2) {
1285  // KokkosKernels GaussSeidel Initialization.
1286  using scalar_view_t = typename local_matrix_device_type::values_type;
1287 
1288  TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error, methodName << ": "
1289  "Multithreaded Gauss-Seidel methods currently only work "
1290  "when the input matrix is a Tpetra::CrsMatrix.");
1291  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
1292 
1293  // TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1294  // const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1295  auto diagView_2d = Diagonal_->getLocalViewDevice(Tpetra::Access::ReadWrite);
1296  scalar_view_t diagView_1d = Kokkos::subview(diagView_2d, Kokkos::ALL(), 0);
1297  KokkosSparse::gauss_seidel_numeric(
1298  mtKernelHandle_.getRawPtr(),
1299  A_->getLocalNumRows(),
1300  A_->getLocalNumCols(),
1301  kcsr.graph.row_map,
1302  kcsr.graph.entries,
1303  kcsr.values,
1304  diagView_1d,
1305  is_matrix_structurally_symmetric_);
1306  } else if (PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1307  if (crsMat)
1308  serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1309  else
1310  serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1311  }
1312  } // end TimeMonitor scope
1313 
1314  ComputeTime_ += (timer->wallTime() - startTime);
1315  ++NumCompute_;
1316  IsComputed_ = true;
1317 }
1318 
1319 template <class MatrixType>
1321  ApplyInverseRichardson(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1322  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1323  using Teuchos::as;
1324  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1325  const double numVectors = as<double>(X.getNumVectors());
1326  if (ZeroStartingSolution_) {
1327  // For the first Richardson sweep, if we are allowed to assume that
1328  // the initial guess is zero, then Richardson is just alpha times the RHS
1329  // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1330  Y.scale(DampingFactor_, X);
1331 
1332  // Count (global) floating-point operations. Ifpack2 represents
1333  // this as a floating-point number rather than an integer, so that
1334  // overflow (for a very large number of calls, or a very large
1335  // problem) is approximate instead of catastrophic.
1336  double flopUpdate = 0.0;
1337  if (DampingFactor_ == STS::one()) {
1338  // Y(i,j) = X(i,j): one multiply for each entry of Y.
1339  flopUpdate = numGlobalRows * numVectors;
1340  } else {
1341  // Y(i,j) = DampingFactor_ * X(i,j):
1342  // One multiplies per entry of Y.
1343  flopUpdate = numGlobalRows * numVectors;
1344  }
1345  ApplyFlops_ += flopUpdate;
1346  if (NumSweeps_ == 1) {
1347  return;
1348  }
1349  }
1350  // If we were allowed to assume that the starting guess was zero,
1351  // then we have already done the first sweep above.
1352  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1353 
1354  // Allocate a multivector if the cached one isn't perfect
1355  updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1356 
1357  for (int j = startSweep; j < NumSweeps_; ++j) {
1358  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1359  Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1360  Y.update(DampingFactor_, *cachedMV_, STS::one());
1361  }
1362 
1363  // For each column of output, for each pass over the matrix:
1364  //
1365  // - One + and one * for each matrix entry
1366  // - One / and one + for each row of the matrix
1367  // - If the damping factor is not one: one * for each row of the
1368  // matrix. (It's not fair to count this if the damping factor is
1369  // one, since the implementation could skip it. Whether it does
1370  // or not is the implementation's choice.)
1371 
1372  // Floating-point operations due to the damping factor, per matrix
1373  // row, per direction, per columm of output.
1374  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1375  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1376  ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1377  (2.0 * numGlobalNonzeros + dampingFlops);
1378 }
1379 
1380 template <class MatrixType>
1381 void Relaxation<MatrixType>::
1382  ApplyInverseJacobi(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1383  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y) const {
1384  using Teuchos::as;
1385  if (hasBlockCrsMatrix_) {
1386  ApplyInverseJacobi_BlockCrsMatrix(X, Y);
1387  return;
1388  }
1389 
1390  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1391  const double numVectors = as<double>(X.getNumVectors());
1392  if (ZeroStartingSolution_) {
1393  // For the first Jacobi sweep, if we are allowed to assume that
1394  // the initial guess is zero, then Jacobi is just diagonal
1395  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1396  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1397  Y.elementWiseMultiply(DampingFactor_, *Diagonal_, X, STS::zero());
1398 
1399  // Count (global) floating-point operations. Ifpack2 represents
1400  // this as a floating-point number rather than an integer, so that
1401  // overflow (for a very large number of calls, or a very large
1402  // problem) is approximate instead of catastrophic.
1403  double flopUpdate = 0.0;
1404  if (DampingFactor_ == STS::one()) {
1405  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1406  flopUpdate = numGlobalRows * numVectors;
1407  } else {
1408  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1409  // Two multiplies per entry of Y.
1410  flopUpdate = 2.0 * numGlobalRows * numVectors;
1411  }
1412  ApplyFlops_ += flopUpdate;
1413  if (NumSweeps_ == 1) {
1414  return;
1415  }
1416  }
1417  // If we were allowed to assume that the starting guess was zero,
1418  // then we have already done the first sweep above.
1419  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1420 
1421  // Allocate a multivector if the cached one isn't perfect
1422  updateCachedMultiVector(Y.getMap(), as<size_t>(numVectors));
1423 
1424  for (int j = startSweep; j < NumSweeps_; ++j) {
1425  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1426  Tpetra::Details::residual(*A_, Y, X, *cachedMV_);
1427  Y.elementWiseMultiply(DampingFactor_, *Diagonal_, *cachedMV_, STS::one());
1428  }
1429 
1430  // For each column of output, for each pass over the matrix:
1431  //
1432  // - One + and one * for each matrix entry
1433  // - One / and one + for each row of the matrix
1434  // - If the damping factor is not one: one * for each row of the
1435  // matrix. (It's not fair to count this if the damping factor is
1436  // one, since the implementation could skip it. Whether it does
1437  // or not is the implementation's choice.)
1438 
1439  // Floating-point operations due to the damping factor, per matrix
1440  // row, per direction, per columm of output.
1441  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1442  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1443  ApplyFlops_ += as<double>(NumSweeps_ - startSweep) * numVectors *
1444  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1445 }
1446 
1447 template <class MatrixType>
1448 void Relaxation<MatrixType>::
1449  ApplyInverseJacobi_BlockCrsMatrix(const Tpetra::MultiVector<scalar_type,
1450  local_ordinal_type,
1451  global_ordinal_type,
1452  node_type>& X,
1453  Tpetra::MultiVector<scalar_type,
1454  local_ordinal_type,
1455  global_ordinal_type,
1456  node_type>& Y) const {
1457  using Tpetra::BlockMultiVector;
1458  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1459  global_ordinal_type, node_type>;
1460 
1461  const block_crs_matrix_type* blockMatConst =
1462  dynamic_cast<const block_crs_matrix_type*>(A_.getRawPtr());
1463  TEUCHOS_TEST_FOR_EXCEPTION(blockMatConst == nullptr, std::logic_error,
1464  "This method should "
1465  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1466  "Please report this bug to the Ifpack2 developers.");
1467  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1468  // This is because applyBlock() is nonconst (more accurate), while
1469  // apply() is const (required by Tpetra::Operator interface, but a
1470  // lie, because it possibly allocates temporary buffers).
1471  block_crs_matrix_type* blockMat =
1472  const_cast<block_crs_matrix_type*>(blockMatConst);
1473 
1474  auto meshRowMap = blockMat->getRowMap();
1475  auto meshColMap = blockMat->getColMap();
1476  const local_ordinal_type blockSize = blockMat->getBlockSize();
1477 
1478  BMV X_blk(X, *meshColMap, blockSize);
1479  BMV Y_blk(Y, *meshRowMap, blockSize);
1480 
1481  if (ZeroStartingSolution_) {
1482  // For the first sweep, if we are allowed to assume that the
1483  // initial guess is zero, then block Jacobi is just block diagonal
1484  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1485  Y_blk.blockWiseMultiply(DampingFactor_, blockDiag_, X_blk);
1486  if (NumSweeps_ == 1) {
1487  return;
1488  }
1489  }
1490 
1491  auto pointRowMap = Y.getMap();
1492  const size_t numVecs = X.getNumVectors();
1493 
1494  // We don't need to initialize the result MV, since the sparse
1495  // mat-vec will clobber its contents anyway.
1496  BMV A_times_Y(*meshRowMap, *pointRowMap, blockSize, numVecs);
1497 
1498  // If we were allowed to assume that the starting guess was zero,
1499  // then we have already done the first sweep above.
1500  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1501 
1502  for (int j = startSweep; j < NumSweeps_; ++j) {
1503  blockMat->applyBlock(Y_blk, A_times_Y);
1504  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1505  Y_blk.blockJacobiUpdate(DampingFactor_, blockDiag_,
1506  X_blk, A_times_Y, STS::one());
1507  }
1508 }
1509 
1510 template <class MatrixType>
1511 void Relaxation<MatrixType>::
1512  ApplyInverseSerialGS(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1513  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1514  Tpetra::ESweepDirection direction) const {
1515  using this_type = Relaxation<MatrixType>;
1516  // The CrsMatrix version is faster, because it can access the sparse
1517  // matrix data directly, rather than by copying out each row's data
1518  // in turn. Thus, we check whether the RowMatrix is really a
1519  // CrsMatrix.
1520  //
1521  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1522  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1523  // will still be correct if the cast fails, but it will use an
1524  // unoptimized kernel.
1525  auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(A_);
1526  auto crsMat = Details::getCrsMatrix(A_);
1527  if (blockCrsMat.get()) {
1528  const_cast<this_type&>(*this).ApplyInverseSerialGS_BlockCrsMatrix(*blockCrsMat, X, Y, direction);
1529  } else if (crsMat.get()) {
1530  ApplyInverseSerialGS_CrsMatrix(*crsMat, X, Y, direction);
1531  } else {
1532  ApplyInverseSerialGS_RowMatrix(X, Y, direction);
1533  }
1534 }
1535 
1536 template <class MatrixType>
1537 void Relaxation<MatrixType>::
1538  ApplyInverseSerialGS_RowMatrix(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1539  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1540  Tpetra::ESweepDirection direction) const {
1541  using Teuchos::Array;
1542  using Teuchos::ArrayRCP;
1543  using Teuchos::ArrayView;
1544  using Teuchos::as;
1545  using Teuchos::RCP;
1546  using Teuchos::rcp;
1547  using Teuchos::rcpFromRef;
1548  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
1549 
1550  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1551  // starting multivector itself. The generic RowMatrix version here
1552  // does not, so we have to zero out Y here.
1553  if (ZeroStartingSolution_) {
1554  Y.putScalar(STS::zero());
1555  }
1556 
1557  size_t NumVectors = X.getNumVectors();
1558 
1559  RCP<MV> Y2;
1560  if (IsParallel_) {
1561  if (Importer_.is_null()) { // domain and column Maps are the same.
1562  updateCachedMultiVector(Y.getMap(), NumVectors);
1563  } else {
1564  updateCachedMultiVector(Importer_->getTargetMap(), NumVectors);
1565  }
1566  Y2 = cachedMV_;
1567  } else {
1568  Y2 = rcpFromRef(Y);
1569  }
1570 
1571  for (int j = 0; j < NumSweeps_; ++j) {
1572  // data exchange is here, once per sweep
1573  if (IsParallel_) {
1574  if (Importer_.is_null()) {
1575  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1576  // clear if this is correct. Reevaluate at some point.
1577 
1578  *Y2 = Y; // just copy, since domain and column Maps are the same
1579  } else {
1580  Y2->doImport(Y, *Importer_, Tpetra::INSERT);
1581  }
1582  }
1583  serialGaussSeidel_->apply(*Y2, X, direction);
1584 
1585  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1586  if (IsParallel_) {
1587  Tpetra::deep_copy(Y, *Y2);
1588  }
1589  }
1590 
1591  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1592  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1593  const double numVectors = as<double>(X.getNumVectors());
1594  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
1595  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
1596  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1597  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1598 }
1599 
1600 template <class MatrixType>
1601 void Relaxation<MatrixType>::
1602  ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1603  const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1604  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1605  Tpetra::ESweepDirection direction) const {
1606  using Teuchos::null;
1607  using Teuchos::RCP;
1608  using Teuchos::rcp;
1609  using Teuchos::rcp_const_cast;
1610  using Teuchos::rcpFromRef;
1611  typedef scalar_type Scalar;
1612  const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1613  const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1614 
1616  !A.isFillComplete(), std::runtime_error,
1617  prefix << "The matrix is not fill complete.");
1619  NumSweeps_ < 0, std::invalid_argument,
1620  prefix << "The number of sweeps must be nonnegative, "
1621  "but you provided numSweeps = "
1622  << NumSweeps_ << " < 0.");
1623 
1624  if (NumSweeps_ == 0) {
1625  return;
1626  }
1627 
1628  RCP<const import_type> importer = A.getGraph()->getImporter();
1629 
1630  RCP<const map_type> domainMap = A.getDomainMap();
1631  RCP<const map_type> rangeMap = A.getRangeMap();
1632  RCP<const map_type> rowMap = A.getGraph()->getRowMap();
1633  RCP<const map_type> colMap = A.getGraph()->getColMap();
1634 
1635 #ifdef HAVE_IFPACK2_DEBUG
1636  {
1637  // The relation 'isSameAs' is transitive. It's also a
1638  // collective, so we don't have to do a "shared" test for
1639  // exception (i.e., a global reduction on the test value).
1641  !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1642  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1643  "multivector X be in the domain Map of the matrix.");
1645  !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1646  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1647  "B be in the range Map of the matrix.");
1649  !Diagonal_->getMap()->isSameAs(*rowMap), std::runtime_error,
1650  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1651  "D be in the row Map of the matrix.");
1653  !rowMap->isSameAs(*rangeMap), std::runtime_error,
1654  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1655  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1657  !domainMap->isSameAs(*rangeMap), std::runtime_error,
1658  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1659  "the range Map of the matrix be the same.");
1660  }
1661 #endif
1662 
1663  // Fetch a (possibly cached) temporary column Map multivector
1664  // X_colMap, and a domain Map view X_domainMap of it. Both have
1665  // constant stride by construction. We know that the domain Map
1666  // must include the column Map, because our Gauss-Seidel kernel
1667  // requires that the row Map, domain Map, and range Map are all
1668  // the same, and that each process owns all of its own diagonal
1669  // entries of the matrix.
1670 
1671  RCP<multivector_type> X_colMap;
1672  RCP<multivector_type> X_domainMap;
1673  bool copyBackOutput = false;
1674  if (importer.is_null()) {
1675  X_colMap = Teuchos::rcpFromRef(X);
1676  X_domainMap = Teuchos::rcpFromRef(X);
1677  // Column Map and domain Map are the same, so there are no
1678  // remote entries. Thus, if we are not setting the initial
1679  // guess to zero, we don't have to worry about setting remote
1680  // entries to zero, even though we are not doing an Import in
1681  // this case.
1682  if (ZeroStartingSolution_) {
1683  X_colMap->putScalar(ZERO);
1684  }
1685  // No need to copy back to X at end.
1686  } else { // Column Map and domain Map are _not_ the same.
1687  updateCachedMultiVector(colMap, X.getNumVectors());
1688  X_colMap = cachedMV_;
1689  X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1690 
1691  if (ZeroStartingSolution_) {
1692  // No need for an Import, since we're filling with zeros.
1693  X_colMap->putScalar(ZERO);
1694  } else {
1695  // We could just copy X into X_domainMap. However, that
1696  // wastes a copy, because the Import also does a copy (plus
1697  // communication). Since the typical use case for
1698  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1699  // don't want to waste that copy. Thus, we do the Import
1700  // here, and skip the first Import in the first sweep.
1701  // Importing directly from X effects the copy into X_domainMap
1702  // (which is a view of X_colMap).
1703  X_colMap->doImport(X, *importer, Tpetra::INSERT);
1704  }
1705  copyBackOutput = true; // Don't forget to copy back at end.
1706  } // if column and domain Maps are (not) the same
1707 
1708  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1709  if (!importer.is_null() && sweep > 0) {
1710  // We already did the first Import for the zeroth sweep above,
1711  // if it was necessary.
1712  X_colMap->doImport(*X_domainMap, *importer, Tpetra::INSERT);
1713  }
1714  // Do local Gauss-Seidel (forward, backward or symmetric)
1715  serialGaussSeidel_->apply(*X_colMap, B, direction);
1716  }
1717 
1718  if (copyBackOutput) {
1719  try {
1720  deep_copy(X, *X_domainMap); // Copy result back into X.
1721  } catch (std::exception& e) {
1723  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1724  "threw an exception: "
1725  << e.what());
1726  }
1727  }
1728 
1729  // For each column of output, for each sweep over the matrix:
1730  //
1731  // - One + and one * for each matrix entry
1732  // - One / and one + for each row of the matrix
1733  // - If the damping factor is not one: one * for each row of the
1734  // matrix. (It's not fair to count this if the damping factor is
1735  // one, since the implementation could skip it. Whether it does
1736  // or not is the implementation's choice.)
1737 
1738  // Floating-point operations due to the damping factor, per matrix
1739  // row, per direction, per columm of output.
1740  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1741  const double numVectors = X.getNumVectors();
1742  const double numGlobalRows = A_->getGlobalNumRows();
1743  const double numGlobalNonzeros = A_->getGlobalNumEntries();
1744  ApplyFlops_ += NumSweeps_ * numVectors *
1745  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1746 }
1747 
1748 template <class MatrixType>
1749 void Relaxation<MatrixType>::
1750  ApplyInverseSerialGS_BlockCrsMatrix(const block_crs_matrix_type& A,
1751  const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1752  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1753  Tpetra::ESweepDirection direction) {
1754  using Teuchos::RCP;
1755  using Teuchos::rcp;
1756  using Teuchos::rcpFromRef;
1757  using Tpetra::INSERT;
1758 
1759  // FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1760  //
1761  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1762  // multiple right-hand sides, unless the input or output MultiVector
1763  // does not have constant stride. We should check for that case
1764  // here, in case it doesn't work in localGaussSeidel (which is
1765  // entirely possible).
1766  block_multivector_type yBlock(Y, *(A.getGraph()->getDomainMap()), A.getBlockSize());
1767  const block_multivector_type xBlock(X, *(A.getColMap()), A.getBlockSize());
1768 
1769  bool performImport = false;
1770  RCP<block_multivector_type> yBlockCol;
1771  if (Importer_.is_null()) {
1772  yBlockCol = rcpFromRef(yBlock);
1773  } else {
1774  if (yBlockColumnPointMap_.is_null() ||
1775  yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1776  yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1777  yBlockColumnPointMap_ =
1778  rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1779  static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1780  }
1781  yBlockCol = yBlockColumnPointMap_;
1782  if (pointImporter_.is_null()) {
1783  auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1784  auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1785  pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1786  }
1787  performImport = true;
1788  }
1789 
1790  multivector_type yBlock_mv;
1791  multivector_type yBlockCol_mv;
1792  RCP<const multivector_type> yBlockColPointDomain;
1793  if (performImport) { // create views (shallow copies)
1794  yBlock_mv = yBlock.getMultiVectorView();
1795  yBlockCol_mv = yBlockCol->getMultiVectorView();
1796  yBlockColPointDomain =
1797  yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1798  }
1799 
1800  if (ZeroStartingSolution_) {
1801  yBlockCol->putScalar(STS::zero());
1802  } else if (performImport) {
1803  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1804  }
1805 
1806  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1807  if (performImport && sweep > 0) {
1808  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1809  }
1810  serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1811  if (performImport) {
1812  Tpetra::deep_copy(Y, *yBlockColPointDomain);
1813  }
1814  }
1815 }
1816 
1817 template <class MatrixType>
1818 void Relaxation<MatrixType>::
1819  ApplyInverseMTGS_CrsMatrix(
1820  const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& B,
1821  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1822  Tpetra::ESweepDirection direction) const {
1823  using Teuchos::as;
1824  using Teuchos::null;
1825  using Teuchos::RCP;
1826  using Teuchos::rcp;
1827  using Teuchos::rcp_const_cast;
1828  using Teuchos::rcpFromRef;
1829 
1830  typedef scalar_type Scalar;
1831 
1832  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1833  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1834 
1835  auto crsMat = Details::getCrsMatrix(A_);
1836  TEUCHOS_TEST_FOR_EXCEPTION(crsMat.is_null(), std::logic_error,
1837  "Ifpack2::Relaxation::apply: "
1838  "Multithreaded Gauss-Seidel methods currently only work when the "
1839  "input matrix is a Tpetra::CrsMatrix.");
1840 
1841  // Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1842  TEUCHOS_TEST_FOR_EXCEPTION(!localSmoothingIndices_.is_null(), std::logic_error,
1843  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1844  "implement the case where the user supplies an iteration order. "
1845  "This error used to appear as \"MT GaussSeidel ignores the given "
1846  "order\". "
1847  "I tried to add more explanation, but I didn't implement \"MT "
1848  "GaussSeidel\" [sic]. "
1849  "You'll have to ask the person who did.");
1850 
1851  TEUCHOS_TEST_FOR_EXCEPTION(!crsMat->isFillComplete(), std::runtime_error, prefix << "The "
1852  "input CrsMatrix is not fill complete. Please call fillComplete "
1853  "on the matrix, then do setup again, before calling apply(). "
1854  "\"Do setup\" means that if only the matrix's values have changed "
1855  "since last setup, you need only call compute(). If the matrix's "
1856  "structure may also have changed, you must first call initialize(), "
1857  "then call compute(). If you have not set up this preconditioner "
1858  "for this matrix before, you must first call initialize(), then "
1859  "call compute().");
1860  TEUCHOS_TEST_FOR_EXCEPTION(NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = " << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1861  "developers.");
1862  if (NumSweeps_ == 0) {
1863  return;
1864  }
1865 
1866  RCP<const import_type> importer = crsMat->getGraph()->getImporter();
1868  !crsMat->getGraph()->getExporter().is_null(), std::runtime_error,
1869  "This method's implementation currently requires that the matrix's row, "
1870  "domain, and range Maps be the same. This cannot be the case, because "
1871  "the matrix has a nontrivial Export object.");
1872 
1873  RCP<const map_type> domainMap = crsMat->getDomainMap();
1874  RCP<const map_type> rangeMap = crsMat->getRangeMap();
1875  RCP<const map_type> rowMap = crsMat->getGraph()->getRowMap();
1876  RCP<const map_type> colMap = crsMat->getGraph()->getColMap();
1877 
1878 #ifdef HAVE_IFPACK2_DEBUG
1879  {
1880  // The relation 'isSameAs' is transitive. It's also a
1881  // collective, so we don't have to do a "shared" test for
1882  // exception (i.e., a global reduction on the test value).
1884  !X.getMap()->isSameAs(*domainMap), std::runtime_error,
1885  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1886  "multivector X be in the domain Map of the matrix.");
1888  !B.getMap()->isSameAs(*rangeMap), std::runtime_error,
1889  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1890  "B be in the range Map of the matrix.");
1892  !rowMap->isSameAs(*rangeMap), std::runtime_error,
1893  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1894  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1896  !domainMap->isSameAs(*rangeMap), std::runtime_error,
1897  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1898  "the range Map of the matrix be the same.");
1899  }
1900 #else
1901  // Forestall any compiler warnings for unused variables.
1902  (void)rangeMap;
1903  (void)rowMap;
1904 #endif // HAVE_IFPACK2_DEBUG
1905 
1906  // Fetch a (possibly cached) temporary column Map multivector
1907  // X_colMap, and a domain Map view X_domainMap of it. Both have
1908  // constant stride by construction. We know that the domain Map
1909  // must include the column Map, because our Gauss-Seidel kernel
1910  // requires that the row Map, domain Map, and range Map are all
1911  // the same, and that each process owns all of its own diagonal
1912  // entries of the matrix.
1913 
1914  RCP<multivector_type> X_colMap;
1915  RCP<multivector_type> X_domainMap;
1916  bool copyBackOutput = false;
1917  if (importer.is_null()) {
1918  if (X.isConstantStride()) {
1919  X_colMap = rcpFromRef(X);
1920  X_domainMap = rcpFromRef(X);
1921 
1922  // Column Map and domain Map are the same, so there are no
1923  // remote entries. Thus, if we are not setting the initial
1924  // guess to zero, we don't have to worry about setting remote
1925  // entries to zero, even though we are not doing an Import in
1926  // this case.
1927  if (ZeroStartingSolution_) {
1928  X_colMap->putScalar(ZERO);
1929  }
1930  // No need to copy back to X at end.
1931  } else {
1932  // We must copy X into a constant stride multivector.
1933  // Just use the cached column Map multivector for that.
1934  // force=true means fill with zeros, so no need to fill
1935  // remote entries (not in domain Map) with zeros.
1936  updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1937  X_colMap = cachedMV_;
1938  // X_domainMap is always a domain Map view of the column Map
1939  // multivector. In this case, the domain and column Maps are
1940  // the same, so X_domainMap _is_ X_colMap.
1941  X_domainMap = X_colMap;
1942  if (!ZeroStartingSolution_) { // Don't copy if zero initial guess
1943  try {
1944  deep_copy(*X_domainMap, X); // Copy X into constant stride MV
1945  } catch (std::exception& e) {
1946  std::ostringstream os;
1947  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1948  "deep_copy(*X_domainMap, X) threw an exception: "
1949  << e.what() << ".";
1950  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
1951  }
1952  }
1953  copyBackOutput = true; // Don't forget to copy back at end.
1954  /*
1955  TPETRA_EFFICIENCY_WARNING(
1956  ! X.isConstantStride (),
1957  std::runtime_error,
1958  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1959  "kernel requires that X and B both have constant stride. Since X "
1960  "does not have constant stride, we had to make a copy. This is a "
1961  "limitation of the current implementation and not your fault, but we "
1962  "still report it as an efficiency warning for your information.");
1963  */
1964  }
1965  } else { // Column Map and domain Map are _not_ the same.
1966  updateCachedMultiVector(colMap, as<size_t>(X.getNumVectors()));
1967  X_colMap = cachedMV_;
1968 
1969  X_domainMap = X_colMap->offsetViewNonConst(domainMap, 0);
1970 
1971  if (ZeroStartingSolution_) {
1972  // No need for an Import, since we're filling with zeros.
1973  X_colMap->putScalar(ZERO);
1974  } else {
1975  // We could just copy X into X_domainMap. However, that
1976  // wastes a copy, because the Import also does a copy (plus
1977  // communication). Since the typical use case for
1978  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1979  // don't want to waste that copy. Thus, we do the Import
1980  // here, and skip the first Import in the first sweep.
1981  // Importing directly from X effects the copy into X_domainMap
1982  // (which is a view of X_colMap).
1983  X_colMap->doImport(X, *importer, Tpetra::CombineMode::INSERT);
1984  }
1985  copyBackOutput = true; // Don't forget to copy back at end.
1986  } // if column and domain Maps are (not) the same
1987 
1988  // The Gauss-Seidel / SOR kernel expects multivectors of constant
1989  // stride. X_colMap is by construction, but B might not be. If
1990  // it's not, we have to make a copy.
1991  RCP<const multivector_type> B_in;
1992  if (B.isConstantStride()) {
1993  B_in = rcpFromRef(B);
1994  } else {
1995  // Range Map and row Map are the same in this case, so we can
1996  // use the cached row Map multivector to store a constant stride
1997  // copy of B.
1998  RCP<multivector_type> B_in_nonconst = rcp(new multivector_type(rowMap, B.getNumVectors()));
1999  try {
2000  deep_copy(*B_in_nonconst, B);
2001  } catch (std::exception& e) {
2002  std::ostringstream os;
2003  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2004  "deep_copy(*B_in_nonconst, B) threw an exception: "
2005  << e.what() << ".";
2006  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what());
2007  }
2008  B_in = rcp_const_cast<const multivector_type>(B_in_nonconst);
2009 
2010  /*
2011  TPETRA_EFFICIENCY_WARNING(
2012  ! B.isConstantStride (),
2013  std::runtime_error,
2014  "MTGaussSeidel: The current implementation requires that B have "
2015  "constant stride. Since B does not have constant stride, we had to "
2016  "copy it into a separate constant-stride multivector. This is a "
2017  "limitation of the current implementation and not your fault, but we "
2018  "still report it as an efficiency warning for your information.");
2019  */
2020  }
2021 
2022  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice();
2023 
2024  bool update_y_vector = true;
2025  // false as it was done up already, and we dont want to zero it in each sweep.
2026  bool zero_x_vector = false;
2027 
2028  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2029  if (!importer.is_null() && sweep > 0) {
2030  // We already did the first Import for the zeroth sweep above,
2031  // if it was necessary.
2032  X_colMap->doImport(*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2033  }
2034 
2035  if (direction == Tpetra::Symmetric) {
2036  KokkosSparse::symmetric_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2037  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2038  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2039  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2040  zero_x_vector, update_y_vector, DampingFactor_, 1);
2041  } else if (direction == Tpetra::Forward) {
2042  KokkosSparse::forward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2043  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2044  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2045  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2046  zero_x_vector, update_y_vector, DampingFactor_, 1);
2047  } else if (direction == Tpetra::Backward) {
2048  KokkosSparse::backward_sweep_gauss_seidel_apply(mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2049  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2050  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2051  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2052  zero_x_vector, update_y_vector, DampingFactor_, 1);
2053  } else {
2055  true, std::invalid_argument,
2056  prefix << "The 'direction' enum does not have any of its valid "
2057  "values: Forward, Backward, or Symmetric.");
2058  }
2059  update_y_vector = false;
2060  }
2061 
2062  if (copyBackOutput) {
2063  try {
2064  deep_copy(X, *X_domainMap); // Copy result back into X.
2065  } catch (std::exception& e) {
2067  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2068  "threw an exception: "
2069  << e.what());
2070  }
2071  }
2072 
2073  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2074  const double numVectors = as<double>(X.getNumVectors());
2075  const double numGlobalRows = as<double>(A_->getGlobalNumRows());
2076  const double numGlobalNonzeros = as<double>(A_->getGlobalNumEntries());
2077  double ApplyFlops = NumSweeps_ * numVectors *
2078  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2079  if (direction == Tpetra::Symmetric)
2080  ApplyFlops *= 2.0;
2081  ApplyFlops_ += ApplyFlops;
2082 }
2083 
2084 template <class MatrixType>
2086  std::ostringstream os;
2087 
2088  // Output is a valid YAML dictionary in flow style. If you don't
2089  // like everything on a single line, you should call describe()
2090  // instead.
2091  os << "\"Ifpack2::Relaxation\": {";
2092 
2093  os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
2094  << "Computed: " << (isComputed() ? "true" : "false") << ", ";
2095 
2096  // It's useful to print this instance's relaxation method (Jacobi,
2097  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2098  // than that, call describe() instead.
2099  os << "Type: ";
2100  if (PrecType_ == Ifpack2::Details::JACOBI) {
2101  os << "Jacobi";
2102  } else if (PrecType_ == Ifpack2::Details::GS) {
2103  os << "Gauss-Seidel";
2104  } else if (PrecType_ == Ifpack2::Details::SGS) {
2105  os << "Symmetric Gauss-Seidel";
2106  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2107  os << "MT Gauss-Seidel";
2108  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2109  os << "MT Symmetric Gauss-Seidel";
2110  } else if (PrecType_ == Ifpack2::Details::GS2) {
2111  os << "Two-stage Gauss-Seidel";
2112  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2113  os << "Two-stage Symmetric Gauss-Seidel";
2114  } else {
2115  os << "INVALID";
2116  }
2117  if (hasBlockCrsMatrix_)
2118  os << ", BlockCrs";
2119 
2120  os << ", "
2121  << "sweeps: " << NumSweeps_ << ", "
2122  << "damping factor: " << DampingFactor_ << ", ";
2123 
2124  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2125  os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2126  os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2127  os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2128  os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2129  switch (mtColoringAlgorithm_) {
2130  case KokkosGraph::COLORING_DEFAULT:
2131  os << "DEFAULT";
2132  break;
2133  case KokkosGraph::COLORING_SERIAL:
2134  os << "SERIAL";
2135  break;
2136  case KokkosGraph::COLORING_VB:
2137  os << "VB";
2138  break;
2139  case KokkosGraph::COLORING_VBBIT:
2140  os << "VBBIT";
2141  break;
2142  case KokkosGraph::COLORING_VBCS:
2143  os << "VBCS";
2144  break;
2145  case KokkosGraph::COLORING_VBD:
2146  os << "VBD";
2147  break;
2148  case KokkosGraph::COLORING_VBDBIT:
2149  os << "VBDBIT";
2150  break;
2151  case KokkosGraph::COLORING_EB:
2152  os << "EB";
2153  break;
2154  default:
2155  os << "*Invalid*";
2156  }
2157  os << ", ";
2158  }
2159 
2160  if (PrecType_ == Ifpack2::Details::GS2 ||
2161  PrecType_ == Ifpack2::Details::SGS2) {
2162  os << "outer sweeps: " << NumOuterSweeps_ << ", "
2163  << "inner sweeps: " << NumInnerSweeps_ << ", "
2164  << "inner damping factor: " << InnerDampingFactor_ << ", ";
2165  }
2166 
2167  if (DoL1Method_) {
2168  os << "use l1: " << DoL1Method_ << ", "
2169  << "l1 eta: " << L1Eta_ << ", ";
2170  }
2171 
2172  if (A_.is_null()) {
2173  os << "Matrix: null";
2174  } else {
2175  os << "Global matrix dimensions: ["
2176  << A_->getGlobalNumRows() << ", " << A_->getGlobalNumCols() << "]"
2177  << ", Global nnz: " << A_->getGlobalNumEntries();
2178  }
2179 
2180  os << "}";
2181  return os.str();
2182 }
2183 
2184 template <class MatrixType>
2187  const Teuchos::EVerbosityLevel verbLevel) const {
2188  using std::endl;
2189  using Teuchos::OSTab;
2191  using Teuchos::VERB_DEFAULT;
2192  using Teuchos::VERB_EXTREME;
2193  using Teuchos::VERB_HIGH;
2194  using Teuchos::VERB_LOW;
2195  using Teuchos::VERB_MEDIUM;
2196  using Teuchos::VERB_NONE;
2197 
2198  const Teuchos::EVerbosityLevel vl =
2199  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2200 
2201  const int myRank = this->getComm()->getRank();
2202 
2203  // none: print nothing
2204  // low: print O(1) info from Proc 0
2205  // medium:
2206  // high:
2207  // extreme:
2208 
2209  if (vl != VERB_NONE && myRank == 0) {
2210  // Describable interface asks each implementation to start with a tab.
2211  OSTab tab1(out);
2212  // Output is valid YAML; hence the quotes, to protect the colons.
2213  out << "\"Ifpack2::Relaxation\":" << endl;
2214  OSTab tab2(out);
2215  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name() << "\""
2216  << endl;
2217  if (this->getObjectLabel() != "") {
2218  out << "Label: " << this->getObjectLabel() << endl;
2219  }
2220  out << "Initialized: " << (isInitialized() ? "true" : "false") << endl
2221  << "Computed: " << (isComputed() ? "true" : "false") << endl
2222  << "Parameters: " << endl;
2223  {
2224  OSTab tab3(out);
2225  out << "\"relaxation: type\": ";
2226  if (PrecType_ == Ifpack2::Details::JACOBI) {
2227  out << "Jacobi";
2228  } else if (PrecType_ == Ifpack2::Details::GS) {
2229  out << "Gauss-Seidel";
2230  } else if (PrecType_ == Ifpack2::Details::SGS) {
2231  out << "Symmetric Gauss-Seidel";
2232  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2233  out << "MT Gauss-Seidel";
2234  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2235  out << "MT Symmetric Gauss-Seidel";
2236  } else if (PrecType_ == Ifpack2::Details::GS2) {
2237  out << "Two-stage Gauss-Seidel";
2238  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2239  out << "Two-stage Symmetric Gauss-Seidel";
2240  } else {
2241  out << "INVALID";
2242  }
2243  // We quote these parameter names because they contain colons.
2244  // YAML uses the colon to distinguish key from value.
2245  out << endl
2246  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2247  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2248  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2249  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2250  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2251  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2252  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2253  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2254  out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2255  out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2256  out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2257  out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2258  switch (mtColoringAlgorithm_) {
2259  case KokkosGraph::COLORING_DEFAULT:
2260  out << "DEFAULT";
2261  break;
2262  case KokkosGraph::COLORING_SERIAL:
2263  out << "SERIAL";
2264  break;
2265  case KokkosGraph::COLORING_VB:
2266  out << "VB";
2267  break;
2268  case KokkosGraph::COLORING_VBBIT:
2269  out << "VBBIT";
2270  break;
2271  case KokkosGraph::COLORING_VBCS:
2272  out << "VBCS";
2273  break;
2274  case KokkosGraph::COLORING_VBD:
2275  out << "VBD";
2276  break;
2277  case KokkosGraph::COLORING_VBDBIT:
2278  out << "VBDBIT";
2279  break;
2280  case KokkosGraph::COLORING_EB:
2281  out << "EB";
2282  break;
2283  default:
2284  out << "*Invalid*";
2285  }
2286  out << endl;
2287  }
2288  if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2289  out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2290  out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2291  out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2292  }
2293  }
2294  out << "Computed quantities:" << endl;
2295  {
2296  OSTab tab3(out);
2297  out << "Global number of rows: " << A_->getGlobalNumRows() << endl
2298  << "Global number of columns: " << A_->getGlobalNumCols() << endl;
2299  }
2300  if (checkDiagEntries_ && isComputed()) {
2301  out << "Properties of input diagonal entries:" << endl;
2302  {
2303  OSTab tab3(out);
2304  out << "Magnitude of minimum-magnitude diagonal entry: "
2305  << globalMinMagDiagEntryMag_ << endl
2306  << "Magnitude of maximum-magnitude diagonal entry: "
2307  << globalMaxMagDiagEntryMag_ << endl
2308  << "Number of diagonal entries with small magnitude: "
2309  << globalNumSmallDiagEntries_ << endl
2310  << "Number of zero diagonal entries: "
2311  << globalNumZeroDiagEntries_ << endl
2312  << "Number of diagonal entries with negative real part: "
2313  << globalNumNegDiagEntries_ << endl
2314  << "Abs 2-norm diff between computed and actual inverse "
2315  << "diagonal: " << globalDiagNormDiff_ << endl;
2316  }
2317  }
2318  if (isComputed()) {
2319  out << "Saved diagonal offsets: "
2320  << (savedDiagOffsets_ ? "true" : "false") << endl;
2321  }
2322  out << "Call counts and total times (in seconds): " << endl;
2323  {
2324  OSTab tab3(out);
2325  out << "initialize: " << endl;
2326  {
2327  OSTab tab4(out);
2328  out << "Call count: " << NumInitialize_ << endl;
2329  out << "Total time: " << InitializeTime_ << endl;
2330  }
2331  out << "compute: " << endl;
2332  {
2333  OSTab tab4(out);
2334  out << "Call count: " << NumCompute_ << endl;
2335  out << "Total time: " << ComputeTime_ << endl;
2336  }
2337  out << "apply: " << endl;
2338  {
2339  OSTab tab4(out);
2340  out << "Call count: " << NumApply_ << endl;
2341  out << "Total time: " << ApplyTime_ << endl;
2342  }
2343  }
2344  }
2345 }
2346 
2347 } // namespace Ifpack2
2348 
2349 #define IFPACK2_RELAXATION_INSTANT(S, LO, GO, N) \
2350  template class Ifpack2::Relaxation<Tpetra::RowMatrix<S, LO, GO, N>>;
2351 
2352 #endif // IFPACK2_RELAXATION_DEF_HPP
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:484
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:519
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:524
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2186
bool is_null(const boost::shared_ptr< T > &p)
basic_OSTab< char > OSTab
static magnitudeType eps()
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:182
T & get(const std::string &name, T def_value)
void compute()
Compute the preconditioner (&quot;numeric setup&quot;);.
Definition: Ifpack2_Relaxation_def.hpp:960
static RCP< Time > getNewCounter(const std::string &name)
static RCP< Time > lookupCounter(const std::string &name)
virtual void printDoc(std::string const &docString, std::ostream &out) const =0
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:529
virtual const std::string getXMLTypeName() const =0
Diagonal preconditioner.
Definition: Ifpack2_Diagonal_decl.hpp:38
static std::ostream & printLines(std::ostream &os, const std::string &linePrefix, const std::string &lines)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:442
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:162
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:499
T * getRawPtr() const
bool remove(std::string const &name, bool throwIfNotExists=true)
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:509
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:230
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:461
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:434
File for utility functions.
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2085
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:489
void validateParametersAndSetDefaults(ParameterList const &validParamList, int const depth=1000)
virtual ValidStringsList validStringValues() const =0
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:474
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:227
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:504
any & getAny(bool activeQry=true)
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:45
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:653
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:453
TypeTo as(const TypeFrom &t)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:224
bool isType(const std::string &name) const
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:221
std::string typeName() const
const std::type_info & type() const
static double wallTime()
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:494
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:206
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:514
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:18
T * get() const
void initialize()
Initialize the preconditioner (&quot;symbolic setup&quot;).
Definition: Ifpack2_Relaxation_def.hpp:674
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:241
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:541
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:191
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:236
virtual void validate(ParameterEntry const &entry, std::string const &paramName, std::string const &sublistName) const =0
bool is_null() const