Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12 
13 #include <sstream> // ostringstream
14 #include <stdexcept> // runtime_error
15 
16 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17 #include "Tpetra_CrsMatrix.hpp"
18 #include "Tpetra_Core.hpp"
19 #include "Teuchos_StandardParameterEntryValidators.hpp"
20 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21 #include "KokkosSparse_trsv.hpp"
22 
23 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24 #include "shylu_hts.hpp"
25 #endif
26 
27 namespace Ifpack2 {
28 
29 namespace Details {
30 
31 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
32 
33 inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
34  const char* file, const int line) {
35  std::ostringstream out;
36 #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37  out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
38 #else
39  out << name << " error( ";
40  switch (cusparseStatus) {
41  case CUSPARSE_STATUS_NOT_INITIALIZED:
42  out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
43  "created correctly.";
44  break;
45  case CUSPARSE_STATUS_ALLOC_FAILED:
46  out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
47  "much memory";
48  break;
49  case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
50  case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
51  case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
52  case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
53  case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
54  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
55  case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
56  default: out << "unrecognized error code): this is bad!"; break;
57  }
58 #endif // CUSPARSE_VERSION
59  if (file) {
60  out << " " << file << ":" << line;
61  }
62  throw std::runtime_error(out.str());
63 }
64 
65 inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
66  const int line = 0) {
67  if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68  cusparse_error_throw(cusparseStatus, name, file, line);
69  }
70 }
71 
72 #define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73  Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
74 
75 #endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
76 
77 struct TrisolverType {
78  enum Enum {
79  Internal,
80  HTS,
81  KSPTRSV
82  };
83 
84  static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
85  type_strs.resize(3);
86  type_strs[0] = "Internal";
87  type_strs[1] = "HTS";
88  type_strs[2] = "KSPTRSV";
89  type_enums.resize(3);
90  type_enums[0] = Internal;
91  type_enums[1] = HTS;
92  type_enums[2] = KSPTRSV;
93  }
94 };
95 } // namespace Details
96 
97 template <class MatrixType>
98 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
99  public:
100  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
101 
102  void reset() {
103 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104  Timpl_ = Teuchos::null;
105  levelset_block_size_ = 1;
106 #endif
107  }
108 
109  void setParameters(const Teuchos::ParameterList& pl) {
110  (void)pl;
111 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112  const char* block_size_s = "trisolver: block size";
113  if (pl.isParameter(block_size_s)) {
114  TEUCHOS_TEST_FOR_EXCEPT_MSG(!pl.isType<int>(block_size_s),
115  "The parameter \"" << block_size_s << "\" must be of type int.");
116  levelset_block_size_ = pl.get<int>(block_size_s);
117  }
118  if (levelset_block_size_ < 1)
119  levelset_block_size_ = 1;
120 #endif
121  }
122 
123  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
124  // call to compute() will trigger the symbolic+numeric phase, and subsequent
125  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
126  // initialize(), essentially nothing happens.
127  void initialize(const crs_matrix_type& /* unused */) {
128 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
129  reset();
130  transpose_ = conjugate_ = false;
131 #endif
132  }
133 
134  void compute(const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
135  (void)T_in;
136  (void)out;
137 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
138  using Teuchos::ArrayRCP;
139 
140  auto rowptr = T_in.getLocalRowPtrsHost();
141  auto colidx = T_in.getLocalIndicesHost();
142  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
143  Kokkos::fence();
144 
145  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
146  HTST::make_CrsMatrix(rowptr.size() - 1,
147  rowptr.data(), colidx.data(),
148  // For std/Kokkos::complex.
149  reinterpret_cast<const scalar_type*>(val.data()),
150  transpose_, conjugate_),
151  HtsCrsMatrixDeleter());
152 
153  if (Teuchos::nonnull(Timpl_)) {
154  // Reuse the nonzero pattern.
155  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
156  } else {
157  // Build from scratch.
158  if (T_in.getCrsGraph().is_null()) {
159  if (Teuchos::nonnull(out))
160  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
161  return;
162  }
163  if (!T_in.getCrsGraph()->isSorted()) {
164  if (Teuchos::nonnull(out))
165  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
166  return;
167  }
168  if (!T_in.isStorageOptimized()) {
169  if (Teuchos::nonnull(out))
170  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
171  return;
172  }
173 
174  typename HTST::PreprocessArgs args;
175  args.T = T_hts.get();
176  args.max_nrhs = 1;
177 #ifdef _OPENMP
178  args.nthreads = omp_get_max_threads();
179 #else
180  args.nthreads = 1;
181 #endif
182  args.save_for_reprocess = true;
183  typename HTST::Options opts;
184  opts.levelset_block_size = levelset_block_size_;
185  args.options = &opts;
186 
187  try {
188  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189  } catch (const std::exception& e) {
190  if (Teuchos::nonnull(out))
191  *out << "HTS preprocess threw: " << e.what() << "\n";
192  }
193  }
194 #endif
195  }
196 
197  // HTS may not be able to handle a matrix, so query whether compute()
198  // succeeded.
199  bool isComputed() {
200 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
201  return Teuchos::nonnull(Timpl_);
202 #else
203  return false;
204 #endif
205  }
206 
207  // Y := beta * Y + alpha * (M * X)
208  void localApply(const MV& X, MV& Y,
209  const Teuchos::ETransp /* mode */,
210  const scalar_type& alpha, const scalar_type& beta) const {
211  (void)X;
212  (void)Y;
213  (void)alpha;
214  (void)beta;
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216  const auto& X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
217  const auto& Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
218 
219  // Only does something if #rhs > current capacity.
220  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
221  // Switch alpha and beta because of HTS's opposite convention.
222  HTST::solve_omp(Timpl_.get(),
223  // For std/Kokkos::complex.
224  reinterpret_cast<const scalar_type*>(X_view.data()),
225  X_view.extent(1),
226  // For std/Kokkos::complex.
227  reinterpret_cast<scalar_type*>(Y_view.data()),
228  beta, alpha);
229 #endif
230  }
231 
232  private:
233 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235  typedef typename HTST::Impl TImpl;
236  typedef typename HTST::CrsMatrix HtsCrsMatrix;
237 
238  struct TImplDeleter {
239  void free(TImpl* impl) {
240  HTST::delete_Impl(impl);
241  }
242  };
243 
244  struct HtsCrsMatrixDeleter {
245  void free(HtsCrsMatrix* T) {
246  HTST::delete_CrsMatrix(T);
247  }
248  };
249 
250  Teuchos::RCP<TImpl> Timpl_;
251  bool transpose_, conjugate_;
252  int levelset_block_size_;
253 #endif
254 };
255 
256 template <class MatrixType>
259  : A_(A) {
260  initializeState();
261 
262  if (!A.is_null()) {
264  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
265  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
266  "Ifpack2::LocalSparseTriangularSolver constructor: "
267  "The input matrix A is not a Tpetra::CrsMatrix.");
268  A_crs_ = A_crs;
269  }
270 }
271 
272 template <class MatrixType>
276  : A_(A)
277  , out_(out) {
278  initializeState();
279  if (!out_.is_null()) {
280  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
281  << std::endl;
282  }
283 
284  if (!A.is_null()) {
286  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
287  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
288  "Ifpack2::LocalSparseTriangularSolver constructor: "
289  "The input matrix A is not a Tpetra::CrsMatrix.");
290  A_crs_ = A_crs;
291  }
292 }
293 
294 template <class MatrixType>
297  initializeState();
298 }
299 
300 template <class MatrixType>
303  : out_(out) {
304  initializeState();
305  if (!out_.is_null()) {
306  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
307  << std::endl;
308  }
309 }
310 
311 template <class MatrixType>
313  isInitialized_ = false;
314  isComputed_ = false;
315  reverseStorage_ = false;
316  isInternallyChanged_ = false;
317  numInitialize_ = 0;
318  numCompute_ = 0;
319  numApply_ = 0;
320  initializeTime_ = 0.0;
321  computeTime_ = 0.0;
322  applyTime_ = 0.0;
323  isKokkosKernelsSptrsv_ = false;
324  isKokkosKernelsStream_ = false;
325  num_streams_ = 0;
326  uplo_ = "N";
327  diag_ = "N";
328 }
329 
330 template <class MatrixType>
333  if (!isKokkosKernelsStream_) {
334  if (Teuchos::nonnull(kh_)) {
335  kh_->destroy_sptrsv_handle();
336  }
337  } else {
338  for (int i = 0; i < num_streams_; i++) {
339  if (Teuchos::nonnull(kh_v_[i])) {
340  kh_v_[i]->destroy_sptrsv_handle();
341  }
342  }
343  }
344 }
345 
346 template <class MatrixType>
349  using Teuchos::Array;
351  using Teuchos::RCP;
352 
353  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
354  do {
355  static const char typeName[] = "trisolver: type";
356 
357  if (!pl.isType<std::string>(typeName)) break;
358 
359  // Map std::string <-> TrisolverType::Enum.
360  Array<std::string> trisolverTypeStrs;
361  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
362  Details::TrisolverType::loadPLTypeOption(trisolverTypeStrs, trisolverTypeEnums);
364  s2i(trisolverTypeStrs(), trisolverTypeEnums(), typeName, false);
365 
366  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
367  } while (0);
368 
369  if (trisolverType == Details::TrisolverType::HTS) {
370  htsImpl_ = Teuchos::rcp(new HtsImpl());
371  htsImpl_->setParameters(pl);
372  }
373 
374  if (trisolverType == Details::TrisolverType::KSPTRSV) {
375  this->isKokkosKernelsSptrsv_ = true;
376  } else {
377  this->isKokkosKernelsSptrsv_ = false;
378  }
379 
380  if (pl.isParameter("trisolver: reverse U"))
381  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
382 
383  TEUCHOS_TEST_FOR_EXCEPTION(reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
384  std::logic_error,
385  "Ifpack2::LocalSparseTriangularSolver::setParameters: "
386  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
387  "options. See GitHub issue #2647.");
388 }
389 
390 template <class MatrixType>
393  using Tpetra::Details::determineLocalTriangularStructure;
394 
395  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
396  using LO = local_ordinal_type;
397 
398  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
399  if (!out_.is_null()) {
400  *out_ << ">>> DEBUG " << prefix << std::endl;
401  }
402 
403  if (!isKokkosKernelsStream_) {
404  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
405  "setMatrix() with a nonnull input matrix before you may call "
406  "initialize() or compute().");
407  if (A_crs_.is_null()) {
408  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
409  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.get() == nullptr, std::invalid_argument,
410  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
411  A_crs_ = A_crs;
412  }
413  auto G = A_crs_->getGraph();
414  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
415  "but A_crs_'s RowGraph G is null. "
416  "Please report this bug to the Ifpack2 developers.");
417  // At this point, the graph MUST be fillComplete. The "initialize"
418  // (symbolic) part of setup only depends on the graph structure, so
419  // the matrix itself need not be fill complete.
420  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
421  "If you call this method, "
422  "the matrix's graph must be fill complete. It is not.");
423 
424  // mfh 30 Apr 2018: See GitHub Issue #2658.
425  constexpr bool ignoreMapsForTriStructure = true;
426  auto lclTriStructure = [&] {
427  auto lclMatrix = A_crs_->getLocalMatrixDevice();
428  auto lclRowMap = A_crs_->getRowMap()->getLocalMap();
429  auto lclColMap = A_crs_->getColMap()->getLocalMap();
430  auto lclTriStruct =
431  determineLocalTriangularStructure(lclMatrix.graph,
432  lclRowMap,
433  lclColMap,
434  ignoreMapsForTriStructure);
435  const LO lclNumRows = lclRowMap.getLocalNumElements();
436  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
437  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" : (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
438  return lclTriStruct;
439  }();
440 
441  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442  htsImpl_.is_null()) {
443  // Reverse the storage for an upper triangular matrix
444  auto Alocal = A_crs_->getLocalMatrixDevice();
445  auto ptr = Alocal.graph.row_map;
446  auto ind = Alocal.graph.entries;
447  auto val = Alocal.values;
448 
449  auto numRows = Alocal.numRows();
450  auto numCols = Alocal.numCols();
451  auto numNnz = Alocal.nnz();
452 
453  typename decltype(ptr)::non_const_type newptr("ptr", ptr.extent(0));
454  typename decltype(ind)::non_const_type newind("ind", ind.extent(0));
455  decltype(val) newval("val", val.extent(0));
456 
457  // FIXME: The code below assumes UVM
458  typename crs_matrix_type::execution_space().fence();
459  newptr(0) = 0;
460  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
461  auto A_r = Alocal.row(numRows - 1 - row);
462 
463  auto numEnt = A_r.length;
464  for (local_ordinal_type k = 0; k < numEnt; ++k) {
465  newind(rowStart + k) = numCols - 1 - A_r.colidx(numEnt - 1 - k);
466  newval(rowStart + k) = A_r.value(numEnt - 1 - k);
467  }
468  rowStart += numEnt;
469  newptr(row + 1) = rowStart;
470  }
471  typename crs_matrix_type::execution_space().fence();
472 
473  // Reverse maps
474  Teuchos::RCP<map_type> newRowMap, newColMap;
475  {
476  // Reverse row map
477  auto rowMap = A_->getRowMap();
478  auto numElems = rowMap->getLocalNumElements();
479  auto rowElems = rowMap->getLocalElementList();
480 
481  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
482  for (size_t i = 0; i < numElems; i++)
483  newRowElems[i] = rowElems[numElems - 1 - i];
484 
485  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
486  }
487  {
488  // Reverse column map
489  auto colMap = A_->getColMap();
490  auto numElems = colMap->getLocalNumElements();
491  auto colElems = colMap->getLocalElementList();
492 
493  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
494  for (size_t i = 0; i < numElems; i++)
495  newColElems[i] = colElems[numElems - 1 - i];
496 
497  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
498  }
499 
500  // Construct new matrix
501  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
502 
503  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
504 
505  isInternallyChanged_ = true;
506 
507  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
508  // didn't want to break any invariants, especially considering
509  // that this branch is likely poorly tested.
510  auto newLclTriStructure =
511  determineLocalTriangularStructure(newLocalMatrix.graph,
512  newRowMap->getLocalMap(),
513  newColMap->getLocalMap(),
514  ignoreMapsForTriStructure);
515  const LO newLclNumRows = newRowMap->getLocalNumElements();
516  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
517  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" : (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
518  }
519  } else {
520  for (int i = 0; i < num_streams_; i++) {
521  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
522  "setMatrix() with a nonnull input matrix before you may call "
523  "initialize() or compute().");
524  auto G = A_crs_v_[i]->getGraph();
525  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_crs_ are nonnull, "
526  "but A_crs_'s RowGraph G is null. "
527  "Please report this bug to the Ifpack2 developers.");
528  // At this point, the graph MUST be fillComplete. The "initialize"
529  // (symbolic) part of setup only depends on the graph structure, so
530  // the matrix itself need not be fill complete.
531  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
532  "If you call this method, "
533  "the matrix's graph must be fill complete. It is not.");
534 
535  // mfh 30 Apr 2018: See GitHub Issue #2658.
536  constexpr bool ignoreMapsForTriStructure = true;
537  std::string prev_uplo_ = this->uplo_;
538  std::string prev_diag_ = this->diag_;
539  auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice();
540  auto lclRowMap = A_crs_v_[i]->getRowMap()->getLocalMap();
541  auto lclColMap = A_crs_v_[i]->getColMap()->getLocalMap();
542  auto lclTriStruct =
543  determineLocalTriangularStructure(lclMatrix.graph,
544  lclRowMap,
545  lclColMap,
546  ignoreMapsForTriStructure);
547  const LO lclNumRows = lclRowMap.getLocalNumElements();
548  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
549  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" : (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
550  if (i > 0) {
551  TEUCHOS_TEST_FOR_EXCEPTION((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
552  std::logic_error, prefix << "A_crs_'s structures in streams "
553  "are different. Please report this bug to the Ifpack2 developers.");
554  }
555  }
556  }
557 
558  if (Teuchos::nonnull(htsImpl_)) {
559  htsImpl_->initialize(*A_crs_);
560  isInternallyChanged_ = true;
561  }
562 
563  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
564  kh_v_nonnull_ = false;
565  if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U") {
566  if (!isKokkosKernelsStream_) {
567  kh_ = Teuchos::rcp(new k_handle());
568  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
569 
570  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
571  auto Alocal = A_crs->getLocalMatrixDevice();
572  auto ptr = Alocal.graph.row_map;
573  auto ind = Alocal.graph.entries;
574  auto val = Alocal.values;
575 
576  auto numRows = Alocal.numRows();
577  kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
578  KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
579  } else {
580  kh_v_ = std::vector<Teuchos::RCP<k_handle>>(num_streams_);
581  for (int i = 0; i < num_streams_; i++) {
582  kh_v_[i] = Teuchos::rcp(new k_handle());
583  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
584  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
585  auto ptr_i = Alocal_i.graph.row_map;
586  auto ind_i = Alocal_i.graph.entries;
587  auto val_i = Alocal_i.values;
588 
589  auto numRows_i = Alocal_i.numRows();
590 
591  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
592  kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
593  KokkosSparse::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
594  }
595  kh_v_nonnull_ = true;
596  }
597  }
598 
599  isInitialized_ = true;
600  ++numInitialize_;
601 }
602 
603 template <class MatrixType>
604 KokkosSparse::Experimental::SPTRSVAlgorithm
606 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
607  // CuSparse only supports int type ordinals
608  // and scalar types of float, double, float complex and double complex
609  if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
610  std::is_same<int, local_ordinal_type>::value &&
611  (std::is_same<scalar_type, float>::value ||
612  std::is_same<scalar_type, double>::value ||
613  std::is_same<impl_scalar_type, Kokkos::complex<float>>::value ||
614  std::is_same<impl_scalar_type, Kokkos::complex<double>>::value)) {
615  return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
616  }
617 #endif
618  return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
619 }
620 
621 template <class MatrixType>
624  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
625  if (!out_.is_null()) {
626  *out_ << ">>> DEBUG " << prefix << std::endl;
627  }
628 
629  if (!isKokkosKernelsStream_) {
630  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
631  "setMatrix() with a nonnull input matrix before you may call "
632  "initialize() or compute().");
633  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_ is nonnull, but "
634  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
635  // At this point, the matrix MUST be fillComplete.
636  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
637  "If you call this "
638  "method, the matrix must be fill complete. It is not.");
639  } else {
640  for (int i = 0; i < num_streams_; i++) {
641  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
642  "setMatrices() with a nonnull input matrix before you may call "
643  "initialize() or compute().");
644  // At this point, the matrix MUST be fillComplete.
645  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
646  "If you call this "
647  "method, the matrix must be fill complete. It is not.");
648  }
649  }
650 
651  if (!isInitialized_) {
652  initialize();
653  }
654  TEUCHOS_TEST_FOR_EXCEPTION(!isInitialized_, std::logic_error, prefix << "initialize() should have "
655  "been called by this point, but isInitialized_ is false. "
656  "Please report this bug to the Ifpack2 developers.");
657 
658 // NOTE (Nov-09-2022):
659 // For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
660 // even when matrix values are changed with the same sparsity pattern.
661 // For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
662 // values that is substantially faster.
663 // This would all be much better handled via a KokkosSparse::sptrsv_numeric(...)
664 // that could hide the Cuda implementation details.
665 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
666  if constexpr (std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda>) {
667  if (this->isKokkosKernelsSptrsv_) {
668  if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
669  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_, true);
670  auto Alocal = A_crs->getLocalMatrixDevice();
671  auto val = Alocal.values;
672 #if (CUSPARSE_VERSION >= 12100)
673  auto* sptrsv_handle = kh_->get_sptrsv_handle();
674  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
675  cusparseSpSV_updateMatrix(cusparse_handle->handle,
676  cusparse_handle->spsvDescr,
677  val.data(),
678  CUSPARSE_SPSV_UPDATE_GENERAL);
679 #else
680  auto ptr = Alocal.graph.row_map;
681  auto ind = Alocal.graph.entries;
682  KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
683 #endif
684  } else if (kh_v_nonnull_) {
685  for (int i = 0; i < num_streams_; i++) {
686  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
687  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
688  auto val_i = Alocal_i.values;
689 #if (CUSPARSE_VERSION >= 12100)
690  auto* sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
691  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
692  IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(
693  cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
694  cusparseSpSV_updateMatrix(cusparse_handle->handle,
695  cusparse_handle->spsvDescr,
696  val_i.data(),
697  CUSPARSE_SPSV_UPDATE_GENERAL);
698 #else
699  auto ptr_i = Alocal_i.graph.row_map;
700  auto ind_i = Alocal_i.graph.entries;
701  KokkosSparse::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
702 #endif
703  }
704  }
705  }
706  }
707 #endif
708 
709  if (!isComputed_) { // Only compute if not computed before
710  if (Teuchos::nonnull(htsImpl_))
711  htsImpl_->compute(*A_crs_, out_);
712 
713  isComputed_ = true;
714  ++numCompute_;
715  }
716 }
717 
718 template <class MatrixType>
720  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type,
722  Tpetra::MultiVector<scalar_type, local_ordinal_type,
724  Teuchos::ETransp mode,
725  scalar_type alpha,
726  scalar_type beta) const {
727  using Teuchos::RCP;
728  using Teuchos::rcp;
729  using Teuchos::rcpFromRef;
730  typedef scalar_type ST;
731  typedef Teuchos::ScalarTraits<ST> STS;
732  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
733 
734  if (!out_.is_null()) {
735  *out_ << ">>> DEBUG " << prefix;
736  if (!isKokkosKernelsStream_) {
737  if (A_crs_.is_null()) {
738  *out_ << "A_crs_ is null!" << std::endl;
739  } else {
741  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
742  const std::string uplo = this->uplo_;
743  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
744  const std::string diag = this->diag_;
745  *out_ << "uplo=\"" << uplo
746  << "\", trans=\"" << trans
747  << "\", diag=\"" << diag << "\"" << std::endl;
748  }
749  } else {
750  for (int i = 0; i < num_streams_; i++) {
751  if (A_crs_v_[i].is_null()) {
752  *out_ << "A_crs_v_[" << i << "]"
753  << " is null!" << std::endl;
754  } else {
755  const std::string uplo = this->uplo_;
756  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
757  const std::string diag = this->diag_;
758  *out_ << "A_crs_v_[" << i << "]: "
759  << "uplo=\"" << uplo
760  << "\", trans=\"" << trans
761  << "\", diag=\"" << diag << "\"" << std::endl;
762  }
763  }
764  }
765  }
766 
767  TEUCHOS_TEST_FOR_EXCEPTION(!isComputed(), std::runtime_error, prefix << "If compute() has not yet "
768  "been called, or if you have changed the matrix via setMatrix(), you must "
769  "call compute() before you may call this method.");
770  // If isComputed() is true, it's impossible for the matrix to be
771  // null, or for it not to be a Tpetra::CrsMatrix.
772  if (!isKokkosKernelsStream_) {
773  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::logic_error, prefix << "A_ is null. "
774  "Please report this bug to the Ifpack2 developers.");
775  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_crs_ is null. "
776  "Please report this bug to the Ifpack2 developers.");
777  // However, it _is_ possible that the user called resumeFill() on
778  // the matrix, after calling compute(). This is NOT allowed.
779  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
780  "If you call this "
781  "method, the matrix must be fill complete. It is not. This means that "
782  " you must have called resumeFill() on the matrix before calling apply(). "
783  "This is NOT allowed. Note that this class may use the matrix's data in "
784  "place without copying it. Thus, you cannot change the matrix and expect "
785  "the solver to stay the same. If you have changed the matrix, first call "
786  "fillComplete() on it, then call compute() on this object, before you call"
787  " apply(). You do NOT need to call setMatrix, as long as the matrix "
788  "itself (that is, its address in memory) is the same.");
789  } else {
790  for (int i = 0; i < num_streams_; i++) {
791  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::logic_error, prefix << "A_crs_ is null. "
792  "Please report this bug to the Ifpack2 developers.");
793  // However, it _is_ possible that the user called resumeFill() on
794  // the matrix, after calling compute(). This is NOT allowed.
795  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
796  "If you call this "
797  "method, the matrix must be fill complete. It is not. This means that "
798  " you must have called resumeFill() on the matrix before calling apply(). "
799  "This is NOT allowed. Note that this class may use the matrix's data in "
800  "place without copying it. Thus, you cannot change the matrix and expect "
801  "the solver to stay the same. If you have changed the matrix, first call "
802  "fillComplete() on it, then call compute() on this object, before you call"
803  " apply(). You do NOT need to call setMatrix, as long as the matrix "
804  "itself (that is, its address in memory) is the same.");
805  }
806  }
807 
808  RCP<const MV> X_cur;
809  RCP<MV> Y_cur;
810 
811  if (!isKokkosKernelsStream_) {
812  auto G = A_crs_->getGraph();
813  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
814  "but A_crs_'s RowGraph G is null. "
815  "Please report this bug to the Ifpack2 developers.");
816  auto importer = G->getImporter();
817  auto exporter = G->getExporter();
818 
819  if (!importer.is_null()) {
820  if (X_colMap_.is_null() || X_colMap_->getNumVectors() != X.getNumVectors()) {
821  X_colMap_ = rcp(new MV(importer->getTargetMap(), X.getNumVectors()));
822  } else {
823  X_colMap_->putScalar(STS::zero());
824  }
825  // See discussion of Github Issue #672 for why the Import needs to
826  // use the ZERO CombineMode. The case where the Export is
827  // nontrivial is likely never exercised.
828  X_colMap_->doImport(X, *importer, Tpetra::ZERO);
829  }
830  X_cur = importer.is_null() ? rcpFromRef(X) : Teuchos::rcp_const_cast<const MV>(X_colMap_);
831 
832  if (!exporter.is_null()) {
833  if (Y_rowMap_.is_null() || Y_rowMap_->getNumVectors() != Y.getNumVectors()) {
834  Y_rowMap_ = rcp(new MV(exporter->getSourceMap(), Y.getNumVectors()));
835  } else {
836  Y_rowMap_->putScalar(STS::zero());
837  }
838  Y_rowMap_->doExport(Y, *importer, Tpetra::ADD);
839  }
840  Y_cur = exporter.is_null() ? rcpFromRef(Y) : Y_rowMap_;
841  } else {
842  // Currently assume X and Y are local vectors (same sizes as A_crs_).
843  // Should do a better job here!!!
844  X_cur = rcpFromRef(X);
845  Y_cur = rcpFromRef(Y);
846  }
847 
848  localApply(*X_cur, *Y_cur, mode, alpha, beta);
849 
850  if (!isKokkosKernelsStream_) {
851  auto G = A_crs_->getGraph();
852  auto exporter = G->getExporter();
853  if (!exporter.is_null()) {
854  Y.putScalar(STS::zero());
855  Y.doExport(*Y_cur, *exporter, Tpetra::ADD);
856  }
857  }
858  ++numApply_;
859 }
860 
861 template <class MatrixType>
863  localTriangularSolve(const MV& Y,
864  MV& X,
865  const Teuchos::ETransp mode) const {
866  using Teuchos::CONJ_TRANS;
867  using Teuchos::NO_TRANS;
868  using Teuchos::TRANS;
869  const char tfecfFuncName[] = "localTriangularSolve: ";
870 
871  if (!isKokkosKernelsStream_) {
872  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_->isFillComplete(), std::runtime_error,
873  "The matrix is not fill complete.");
874  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
875  "The matrix is neither upper triangular or lower triangular. "
876  "You may only call this method if the matrix is triangular. "
877  "Remember that this is a local (per MPI process) property, and that "
878  "Tpetra only knows how to do a local (per process) triangular solve.");
879  } else {
880  for (int i = 0; i < num_streams_; i++) {
881  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
882  "The matrix is not fill complete.");
883  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
884  "The matrix is neither upper triangular or lower triangular. "
885  "You may only call this method if the matrix is triangular. "
886  "Remember that this is a local (per MPI process) property, and that "
887  "Tpetra only knows how to do a local (per process) triangular solve.");
888  }
889  }
890  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!X.isConstantStride() || !Y.isConstantStride(), std::invalid_argument,
891  "X and Y must be constant stride.");
893  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(STS::isComplex && mode == TRANS, std::logic_error,
894  "This method does "
895  "not currently support non-conjugated transposed solve (mode == "
896  "Teuchos::TRANS) for complex scalar types.");
897 
898  const std::string uplo = this->uplo_;
899  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
900  const size_t numVecs = std::min(X.getNumVectors(), Y.getNumVectors());
901 
902  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N") {
903  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(this->A_);
904  auto A_lclk = A_crs->getLocalMatrixDevice();
905  auto ptr = A_lclk.graph.row_map;
906  auto ind = A_lclk.graph.entries;
907  auto val = A_lclk.values;
908 
909  for (size_t j = 0; j < numVecs; ++j) {
910  auto X_j = X.getVectorNonConst(j);
911  auto Y_j = Y.getVector(j);
912  auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
913  auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
914  auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
915  auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
916  KokkosSparse::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
917  // TODO is this fence needed...
918  typename k_handle::HandleExecSpace().fence();
919  }
920  } // End using regular interface of Kokkos Kernels Sptrsv
921  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N") {
922  std::vector<lno_row_view_t> ptr_v(num_streams_);
923  std::vector<lno_nonzero_view_t> ind_v(num_streams_);
924  std::vector<scalar_nonzero_view_t> val_v(num_streams_);
925  std::vector<k_handle*> KernelHandle_rawptr_v_(num_streams_);
926  for (size_t j = 0; j < numVecs; ++j) {
927  auto X_j = X.getVectorNonConst(j);
928  auto Y_j = Y.getVector(j);
929  auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
930  auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
931  auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
932  auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
933  std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
934  std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
935  local_ordinal_type stream_begin = 0;
936  local_ordinal_type stream_end;
937  for (int i = 0; i < num_streams_; i++) {
938  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i]);
939  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
940  ptr_v[i] = Alocal_i.graph.row_map;
941  ind_v[i] = Alocal_i.graph.entries;
942  val_v[i] = Alocal_i.values;
943  stream_end = stream_begin + Alocal_i.numRows();
944  x_v[i] = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
945  y_v[i] = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
946  ;
947  KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
948  stream_begin = stream_end;
949  }
950  Kokkos::fence();
951  KokkosSparse::Experimental::sptrsv_solve_streams(exec_space_instances_, KernelHandle_rawptr_v_,
952  ptr_v, ind_v, val_v, y_v, x_v);
953  Kokkos::fence();
954  }
955  } // End using stream interface of Kokkos Kernels Sptrsv
956  else {
957  const std::string diag = this->diag_;
958  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
959  // sequential, host-only code. See
960  // https://github.com/kokkos/kokkos-kernels/issues/48.
961 
962  auto A_lcl = this->A_crs_->getLocalMatrixHost();
963 
964  if (X.isConstantStride() && Y.isConstantStride()) {
965  auto X_lcl = X.getLocalViewHost(Tpetra::Access::ReadWrite);
966  auto Y_lcl = Y.getLocalViewHost(Tpetra::Access::ReadOnly);
967  KokkosSparse::trsv(uplo.c_str(), trans.c_str(), diag.c_str(),
968  A_lcl, Y_lcl, X_lcl);
969  } else {
970  for (size_t j = 0; j < numVecs; ++j) {
971  auto X_j = X.getVectorNonConst(j);
972  auto Y_j = Y.getVector(j);
973  auto X_lcl = X_j->getLocalViewHost(Tpetra::Access::ReadWrite);
974  auto Y_lcl = Y_j->getLocalViewHost(Tpetra::Access::ReadOnly);
975  KokkosSparse::trsv(uplo.c_str(), trans.c_str(),
976  diag.c_str(), A_lcl, Y_lcl, X_lcl);
977  }
978  }
979  }
980 }
981 
982 template <class MatrixType>
983 void LocalSparseTriangularSolver<MatrixType>::
984  localApply(const MV& X,
985  MV& Y,
986  const Teuchos::ETransp mode,
987  const scalar_type& alpha,
988  const scalar_type& beta) const {
989  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull(htsImpl_) &&
990  htsImpl_->isComputed()) {
991  htsImpl_->localApply(X, Y, mode, alpha, beta);
992  return;
993  }
994 
995  using Teuchos::RCP;
996  typedef scalar_type ST;
997  typedef Teuchos::ScalarTraits<ST> STS;
998 
999  if (beta == STS::zero()) {
1000  if (alpha == STS::zero()) {
1001  Y.putScalar(STS::zero()); // Y := 0 * Y (ignore contents of Y)
1002  } else { // alpha != 0
1003  this->localTriangularSolve(X, Y, mode);
1004  if (alpha != STS::one()) {
1005  Y.scale(alpha);
1006  }
1007  }
1008  } else { // beta != 0
1009  if (alpha == STS::zero()) {
1010  Y.scale(beta); // Y := beta * Y
1011  } else { // alpha != 0
1012  MV Y_tmp(Y, Teuchos::Copy);
1013  this->localTriangularSolve(X, Y_tmp, mode); // Y_tmp := M * X
1014  Y.update(alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1015  }
1016  }
1017 }
1018 
1019 template <class MatrixType>
1022  return numInitialize_;
1023 }
1024 
1025 template <class MatrixType>
1028  return numCompute_;
1029 }
1030 
1031 template <class MatrixType>
1033  getNumApply() const {
1034  return numApply_;
1035 }
1036 
1037 template <class MatrixType>
1038 double
1041  return initializeTime_;
1042 }
1043 
1044 template <class MatrixType>
1045 double
1048  return computeTime_;
1049 }
1050 
1051 template <class MatrixType>
1052 double
1054  getApplyTime() const {
1055  return applyTime_;
1056 }
1057 
1058 template <class MatrixType>
1059 std::string
1061  description() const {
1062  std::ostringstream os;
1063 
1064  // Output is a valid YAML dictionary in flow style. If you don't
1065  // like everything on a single line, you should call describe()
1066  // instead.
1067  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1068  if (this->getObjectLabel() != "") {
1069  os << "Label: \"" << this->getObjectLabel() << "\", ";
1070  }
1071  os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1072  << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1073 
1074  if (isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1075  if (isKokkosKernelsStream_) os << "KK-SolveStream, ";
1076 
1077  if (A_.is_null()) {
1078  os << "Matrix: null";
1079  } else {
1080  os << "Matrix dimensions: ["
1081  << A_->getGlobalNumRows() << ", "
1082  << A_->getGlobalNumCols() << "]"
1083  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1084  }
1085 
1086  if (Teuchos::nonnull(htsImpl_))
1087  os << ", HTS computed: " << (htsImpl_->isComputed() ? "true" : "false");
1088  os << "}";
1089  return os.str();
1090 }
1091 
1092 template <class MatrixType>
1095  const Teuchos::EVerbosityLevel verbLevel) const {
1096  using std::endl;
1097  // Default verbosity level is VERB_LOW, which prints only on Process
1098  // 0 of the matrix's communicator.
1099  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1100 
1101  if (vl != Teuchos::VERB_NONE) {
1102  // Print only on Process 0 in the matrix's communicator. If the
1103  // matrix is null, though, we have to get the communicator from
1104  // somewhere, so we ask Tpetra for its default communicator. If
1105  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1106  auto comm = A_.is_null() ? Tpetra::getDefaultComm() : A_->getComm();
1107 
1108  // Users aren't supposed to do anything with the matrix on
1109  // processes where its communicator is null.
1110  if (!comm.is_null() && comm->getRank() == 0) {
1111  // By convention, describe() should always begin with a tab.
1112  Teuchos::OSTab tab0(out);
1113  // Output is in YAML format. We have to escape the class name,
1114  // because it has a colon.
1115  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1116  Teuchos::OSTab tab1(out);
1117  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name() << endl
1118  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name() << endl
1119  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name() << endl
1120  << "Node: " << Teuchos::TypeNameTraits<node_type>::name() << endl;
1121  }
1122  }
1123 }
1124 
1125 template <class MatrixType>
1128  getDomainMap() const {
1129  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1130  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1131  "The matrix is null. Please call setMatrix() with a nonnull input "
1132  "before calling this method.");
1133  return A_->getDomainMap();
1134 }
1135 
1136 template <class MatrixType>
1139  getRangeMap() const {
1140  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1141  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1142  "The matrix is null. Please call setMatrix() with a nonnull input "
1143  "before calling this method.");
1144  return A_->getRangeMap();
1145 }
1146 
1147 template <class MatrixType>
1150  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1151 
1152  // If the pointer didn't change, do nothing. This is reasonable
1153  // because users are supposed to call this method with the same
1154  // object over all participating processes, and pointer identity
1155  // implies object identity.
1156  if (A.getRawPtr() != A_.getRawPtr() || isInternallyChanged_) {
1157  // Check in serial or one-process mode if the matrix is square.
1158  TEUCHOS_TEST_FOR_EXCEPTION(!A.is_null() && A->getComm()->getSize() == 1 &&
1159  A->getLocalNumRows() != A->getLocalNumCols(),
1160  std::runtime_error, prefix << "If A's communicator only contains one "
1161  "process, then A must be square. Instead, you provided a matrix A with "
1162  << A->getLocalNumRows() << " rows and " << A->getLocalNumCols() << " columns.");
1163 
1164  // It's legal for A to be null; in that case, you may not call
1165  // initialize() until calling setMatrix() with a nonnull input.
1166  // Regardless, setting the matrix invalidates the preconditioner.
1167  isInitialized_ = false;
1168  isComputed_ = false;
1169 
1170  if (A.is_null()) {
1171  A_crs_ = Teuchos::null;
1172  A_ = Teuchos::null;
1173  } else { // A is not null
1175  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
1176  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1177  A_crs_ = A_crs;
1178  A_ = A;
1179  }
1180 
1181  if (Teuchos::nonnull(htsImpl_))
1182  htsImpl_->reset();
1183  } // pointers are not the same
1184 }
1185 
1186 template <class MatrixType>
1188  setStreamInfo(const bool& isKokkosKernelsStream, const int& num_streams,
1189  const std::vector<HandleExecSpace>& exec_space_instances) {
1190  isKokkosKernelsStream_ = isKokkosKernelsStream;
1191  num_streams_ = num_streams;
1192  exec_space_instances_ = exec_space_instances;
1193  A_crs_v_ = std::vector<Teuchos::RCP<crs_matrix_type>>(num_streams_);
1194 }
1195 
1196 template <class MatrixType>
1198  setMatrices(const std::vector<Teuchos::RCP<crs_matrix_type>>& A_crs_v) {
1199  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1200 
1201  for (int i = 0; i < num_streams_; i++) {
1202  // If the pointer didn't change, do nothing. This is reasonable
1203  // because users are supposed to call this method with the same
1204  // object over all participating processes, and pointer identity
1205  // implies object identity.
1206  if (A_crs_v[i].getRawPtr() != A_crs_v_[i].getRawPtr() || isInternallyChanged_) {
1207  // Check in serial or one-process mode if the matrix is square.
1208  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v[i].is_null() && A_crs_v[i]->getComm()->getSize() == 1 &&
1209  A_crs_v[i]->getLocalNumRows() != A_crs_v[i]->getLocalNumCols(),
1210  std::runtime_error, prefix << "If A's communicator only contains one "
1211  "process, then A must be square. Instead, you provided a matrix A with "
1212  << A_crs_v[i]->getLocalNumRows() << " rows and " << A_crs_v[i]->getLocalNumCols() << " columns.");
1213 
1214  // It's legal for A to be null; in that case, you may not call
1215  // initialize() until calling setMatrix() with a nonnull input.
1216  // Regardless, setting the matrix invalidates the preconditioner.
1217  isInitialized_ = false;
1218  isComputed_ = false;
1219 
1220  if (A_crs_v[i].is_null()) {
1221  A_crs_v_[i] = Teuchos::null;
1222  } else { // A is not null
1224  Teuchos::rcp_dynamic_cast<crs_matrix_type>(A_crs_v[i]);
1225  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1226  A_crs_v_[i] = A_crs;
1227  }
1228  } // pointers are not the same
1229  }
1230 }
1231 
1232 } // namespace Ifpack2
1233 
1234 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S, LO, GO, N) \
1235  template class Ifpack2::LocalSparseTriangularSolver<Tpetra::RowMatrix<S, LO, GO, N>>;
1236 
1237 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:720
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1027
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1061
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:623
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:191
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1054
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1047
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1040
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner&#39;s matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1198
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1128
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:392
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1021
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver&#39;s stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1188
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1149
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1033
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1139
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:348
MatrixType::scalar_type scalar_type
Scalar type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:56
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1094
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:332
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:296
static std::string name()
std::string typeName(const T &t)
bool is_null() const