Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Experimental_RBILUK_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
11 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
12 
13 #include "Tpetra_BlockMultiVector.hpp"
14 #include "Tpetra_BlockView.hpp"
15 #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
16 #include "Ifpack2_OverlappingRowMatrix.hpp"
17 #include "Ifpack2_Details_getCrsMatrix.hpp"
18 #include "Ifpack2_LocalFilter.hpp"
19 #include "Ifpack2_Utilities.hpp"
20 #include "Ifpack2_RILUK.hpp"
21 #include "KokkosSparse_sptrsv.hpp"
22 
23 //#define IFPACK2_RBILUK_INITIAL
24 //#define IFPACK2_RBILUK_INITIAL_NOKK
25 
26 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
27 #include "KokkosBatched_Gemm_Decl.hpp"
28 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
29 #include "KokkosBatched_Util.hpp"
30 #endif
31 
32 namespace Ifpack2 {
33 
34 namespace Experimental {
35 
36 namespace {
37 template <class MatrixType>
38 struct LocalRowHandler {
39  using LocalOrdinal = typename MatrixType::local_ordinal_type;
40  using row_matrix_type = Tpetra::RowMatrix<
41  typename MatrixType::scalar_type,
42  LocalOrdinal,
43  typename MatrixType::global_ordinal_type,
44  typename MatrixType::node_type>;
45  using inds_type = typename row_matrix_type::local_inds_host_view_type;
46  using vals_type = typename row_matrix_type::values_host_view_type;
47 
48  LocalRowHandler(Teuchos::RCP<const row_matrix_type> A)
49  : A_(std::move(A)) {
50  if (!A_->supportsRowViews()) {
51  const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
52  const auto blockSize = A_->getBlockSize();
53  ind_nc_ = inds_type_nc("Ifpack2::RBILUK::LocalRowHandler::indices", maxNumRowEntr);
54  val_nc_ = vals_type_nc("Ifpack2::RBILUK::LocalRowHandler::values", maxNumRowEntr * blockSize * blockSize);
55  }
56  }
57 
58  void getLocalRow(LocalOrdinal local_row, inds_type& InI, vals_type& InV, LocalOrdinal& NumIn) {
59  if (A_->supportsRowViews()) {
60  A_->getLocalRowView(local_row, InI, InV);
61  NumIn = (LocalOrdinal)InI.size();
62  } else {
63  size_t cnt;
64  A_->getLocalRowCopy(local_row, ind_nc_, val_nc_, cnt);
65  InI = ind_nc_;
66  InV = val_nc_;
67  NumIn = (LocalOrdinal)cnt;
68  }
69  }
70 
71  private:
72  using inds_type_nc = typename row_matrix_type::nonconst_local_inds_host_view_type;
73  using vals_type_nc = typename row_matrix_type::nonconst_values_host_view_type;
74 
76  inds_type_nc ind_nc_;
77  vals_type_nc val_nc_;
78 };
79 
80 } // namespace
81 
82 template <class MatrixType>
84  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in)) {}
85 
86 template <class MatrixType>
88  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in)) {}
89 
90 template <class MatrixType>
92 
93 template <class MatrixType>
95  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
96 
97  // It's legal for A to be null; in that case, you may not call
98  // initialize() until calling setMatrix() with a nonnull input.
99  // Regardless, setting the matrix invalidates any previous
100  // factorization.
101  if (A.getRawPtr() != this->A_.getRawPtr()) {
102  this->isAllocated_ = false;
103  this->isInitialized_ = false;
104  this->isComputed_ = false;
105  this->Graph_ = Teuchos::null;
106  L_block_ = Teuchos::null;
107  U_block_ = Teuchos::null;
108  D_block_ = Teuchos::null;
109  }
110 }
111 
112 template <class MatrixType>
113 const typename RBILUK<MatrixType>::block_crs_matrix_type&
116  L_block_.is_null(), std::runtime_error,
117  "Ifpack2::RILUK::getL: The L factor "
118  "is null. Please call initialize() (and preferably also compute()) "
119  "before calling this method. If the input matrix has not yet been set, "
120  "you must first call setMatrix() with a nonnull input matrix before you "
121  "may call initialize() or compute().");
122  return *L_block_;
123 }
124 
125 template <class MatrixType>
126 const typename RBILUK<MatrixType>::block_crs_matrix_type&
129  D_block_.is_null(), std::runtime_error,
130  "Ifpack2::RILUK::getD: The D factor "
131  "(of diagonal entries) is null. Please call initialize() (and "
132  "preferably also compute()) before calling this method. If the input "
133  "matrix has not yet been set, you must first call setMatrix() with a "
134  "nonnull input matrix before you may call initialize() or compute().");
135  return *D_block_;
136 }
137 
138 template <class MatrixType>
139 const typename RBILUK<MatrixType>::block_crs_matrix_type&
142  U_block_.is_null(), std::runtime_error,
143  "Ifpack2::RILUK::getU: The U factor "
144  "is null. Please call initialize() (and preferably also compute()) "
145  "before calling this method. If the input matrix has not yet been set, "
146  "you must first call setMatrix() with a nonnull input matrix before you "
147  "may call initialize() or compute().");
148  return *U_block_;
149 }
150 
151 template <class MatrixType>
153  using Teuchos::null;
154  using Teuchos::rcp;
155 
156  if (!this->isAllocated_) {
157  // Deallocate any existing storage. This avoids storing 2x
158  // memory, since RCP op= does not deallocate until after the
159  // assignment.
160  this->L_ = null;
161  this->U_ = null;
162  this->D_ = null;
163  L_block_ = null;
164  U_block_ = null;
165  D_block_ = null;
166 
167  // Allocate Matrix using ILUK graphs
168  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph(), blockSize_));
169  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph(), blockSize_));
170  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
171  blockSize_));
172  L_block_->setAllToScalar(STM::zero()); // Zero out L and U matrices
173  U_block_->setAllToScalar(STM::zero());
174  D_block_->setAllToScalar(STM::zero());
175 
176  // Allocate temp space for apply
177  if (this->isKokkosKernelsSpiluk_) {
178  const auto numRows = L_block_->getLocalNumRows();
179  tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_);
180  }
181  }
182  this->isAllocated_ = true;
183 }
184 
185 namespace {
186 
187 template <class MatrixType>
189 getBlockCrsGraph(const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A) {
190  using local_ordinal_type = typename MatrixType::local_ordinal_type;
191  using Teuchos::RCP;
192  using Teuchos::rcp;
193  using Teuchos::rcp_const_cast;
194  using Teuchos::rcp_dynamic_cast;
195  using Teuchos::rcpFromRef;
196  using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
197  using crs_graph_type = typename RBILUK<MatrixType>::crs_graph_type;
198  using block_crs_matrix_type = typename RBILUK<MatrixType>::block_crs_matrix_type;
199 
200  auto A_local = RBILUK<MatrixType>::makeLocalFilter(A);
201 
202  {
203  RCP<const LocalFilter<row_matrix_type> > filteredA =
204  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_local);
205  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
206  RCP<const block_crs_matrix_type> A_block = Teuchos::null;
207  if (!filteredA.is_null()) {
208  overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> >(filteredA->getUnderlyingMatrix());
209  }
210 
211  if (!overlappedA.is_null()) {
212  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
213  } else if (!filteredA.is_null()) {
214  // If there is no overlap, filteredA could be the block CRS matrix
215  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
216  } else {
217  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(A_local);
218  }
219 
220  if (!A_block.is_null()) {
221  return rcpFromRef(A_block->getCrsGraph());
222  }
223  }
224 
225  // Could not extract block crs, make graph manually
226  {
227  local_ordinal_type numRows = A_local->getLocalNumRows();
228  Teuchos::Array<size_t> entriesPerRow(numRows);
229  for (local_ordinal_type i = 0; i < numRows; i++) {
230  entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
231  }
232  RCP<crs_graph_type> A_local_crs_nc =
233  rcp(new crs_graph_type(A_local->getRowMap(),
234  A_local->getColMap(),
235  entriesPerRow()));
236 
237  {
238  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
239  LocalRowHandler_t localRowHandler(A_local);
240  typename LocalRowHandler_t::inds_type indices;
241  typename LocalRowHandler_t::vals_type values;
242  for (local_ordinal_type i = 0; i < numRows; i++) {
243  local_ordinal_type numEntries = 0;
244  localRowHandler.getLocalRow(i, indices, values, numEntries);
245  A_local_crs_nc->insertLocalIndices(i, numEntries, indices.data());
246  }
247  }
248 
249  A_local_crs_nc->fillComplete(A_local->getDomainMap(), A_local->getRangeMap());
250  return rcp_const_cast<const crs_graph_type>(A_local_crs_nc);
251  }
252 }
253 
254 } // namespace
255 
256 template <class MatrixType>
258  using Teuchos::RCP;
259  using Teuchos::rcp;
260  using Teuchos::rcp_dynamic_cast;
261  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
262 
263  TEUCHOS_TEST_FOR_EXCEPTION(this->A_.is_null(), std::runtime_error, prefix << "The matrix (A_, the "
264  "RowMatrix) is null. Please call setMatrix() with a nonnull input "
265  "before calling this method.");
266  TEUCHOS_TEST_FOR_EXCEPTION(!this->A_->isFillComplete(), std::runtime_error, prefix << "The matrix "
267  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
268  "initialize() or compute() with this matrix until the matrix is fill "
269  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
270  "underlying graph is fill complete.");
271 
272  blockSize_ = this->A_->getBlockSize();
273  this->A_local_ = this->makeLocalFilter(this->A_);
274 
275  Teuchos::Time timer("RBILUK::initialize");
276  double startTime = timer.wallTime();
277  { // Start timing
278  Teuchos::TimeMonitor timeMon(timer);
279 
280  // Calling initialize() means that the user asserts that the graph
281  // of the sparse matrix may have changed. We must not just reuse
282  // the previous graph in that case.
283  //
284  // Regarding setting isAllocated_ to false: Eventually, we may want
285  // some kind of clever memory reuse strategy, but it's always
286  // correct just to blow everything away and start over.
287  this->isInitialized_ = false;
288  this->isAllocated_ = false;
289  this->isComputed_ = false;
290  this->Graph_ = Teuchos::null;
291 
292  RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
293  this->Graph_ = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(matrixCrsGraph,
294  this->LevelOfFill_, 0));
295 
296  if (this->isKokkosKernelsSpiluk_) {
297  this->KernelHandle_ = Teuchos::rcp(new kk_handle_type());
298  const auto numRows = this->A_local_->getLocalNumRows();
299  KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
300  numRows,
301  2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
302  2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
303  blockSize_);
304  this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic
305 
306  this->L_Sptrsv_KernelHandle_ = Teuchos::rcp(new kk_handle_type());
307  this->U_Sptrsv_KernelHandle_ = Teuchos::rcp(new kk_handle_type());
308 
309  KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
310 
311  this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
312  this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
313  } else {
314  this->Graph_->initialize();
315  }
316 
317  allocate_L_and_U_blocks();
318 
319 #ifdef IFPACK2_RBILUK_INITIAL
320  initAllValues();
321 #endif
322  } // Stop timing
323 
324  this->isInitialized_ = true;
325  this->numInitialize_ += 1;
326  this->initializeTime_ += (timer.wallTime() - startTime);
327 }
328 
329 template <class MatrixType>
331  initAllValues() {
332  using Teuchos::RCP;
333  typedef Tpetra::Map<LO, GO, node_type> map_type;
334 
335  LO NumIn = 0, NumL = 0, NumU = 0;
336  bool DiagFound = false;
337  size_t NumNonzeroDiags = 0;
338  size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
339  LO blockMatSize = blockSize_ * blockSize_;
340 
341  // First check that the local row map ordering is the same as the local portion of the column map.
342  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
343  // implicitly assume that this is the case.
344  Teuchos::ArrayView<const GO> rowGIDs = this->A_->getRowMap()->getLocalElementList();
345  Teuchos::ArrayView<const GO> colGIDs = this->A_->getColMap()->getLocalElementList();
346  bool gidsAreConsistentlyOrdered = true;
347  GO indexOfInconsistentGID = 0;
348  for (GO i = 0; i < rowGIDs.size(); ++i) {
349  if (rowGIDs[i] != colGIDs[i]) {
350  gidsAreConsistentlyOrdered = false;
351  indexOfInconsistentGID = i;
352  break;
353  }
354  }
355  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered == false, std::runtime_error,
356  "The ordering of the local GIDs in the row and column maps is not the same"
357  << std::endl
358  << "at index " << indexOfInconsistentGID
359  << ". Consistency is required, as all calculations are done with"
360  << std::endl
361  << "local indexing.");
362 
363  // Allocate temporary space for extracting the strictly
364  // lower and upper parts of the matrix A.
365  Teuchos::Array<LO> LI(MaxNumEntries);
366  Teuchos::Array<LO> UI(MaxNumEntries);
367  Teuchos::Array<scalar_type> LV(MaxNumEntries * blockMatSize);
368  Teuchos::Array<scalar_type> UV(MaxNumEntries * blockMatSize);
369 
370  Teuchos::Array<scalar_type> diagValues(blockMatSize);
371 
372  L_block_->setAllToScalar(STM::zero()); // Zero out L and U matrices
373  U_block_->setAllToScalar(STM::zero());
374  D_block_->setAllToScalar(STM::zero()); // Set diagonal values to zero
375 
376  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
377  // host, so sync to host first. The const_cast is unfortunate but
378  // is our only option to make this correct.
379 
380  /*
381  const_cast<block_crs_matrix_type&> (A).sync_host ();
382  L_block_->sync_host ();
383  U_block_->sync_host ();
384  D_block_->sync_host ();
385  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
386  L_block_->modify_host ();
387  U_block_->modify_host ();
388  D_block_->modify_host ();
389  */
390 
391  RCP<const map_type> rowMap = L_block_->getRowMap();
392 
393  // First we copy the user's matrix into L and U, regardless of fill level.
394  // It is important to note that L and U are populated using local indices.
395  // This means that if the row map GIDs are not monotonically increasing
396  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
397  // matrix is not the one that you would get if you based L (U) on GIDs.
398  // This is ok, as the *order* of the GIDs in the rowmap is a better
399  // expression of the user's intent than the GIDs themselves.
400 
401  // TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
402  Kokkos::fence();
403  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
404  LocalRowHandler_t localRowHandler(this->A_);
405  typename LocalRowHandler_t::inds_type InI;
406  typename LocalRowHandler_t::vals_type InV;
407  for (size_t myRow = 0; myRow < this->A_->getLocalNumRows(); ++myRow) {
408  LO local_row = myRow;
409 
410  localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
411 
412  // Split into L and U (we don't assume that indices are ordered).
413  NumL = 0;
414  NumU = 0;
415  DiagFound = false;
416 
417  for (LO j = 0; j < NumIn; ++j) {
418  const LO k = InI[j];
419  const LO blockOffset = blockMatSize * j;
420 
421  if (k == local_row) {
422  DiagFound = true;
423  // Store perturbed diagonal in Tpetra::Vector D_
424  for (LO jj = 0; jj < blockMatSize; ++jj)
425  diagValues[jj] = this->Rthresh_ * InV[blockOffset + jj] + IFPACK2_SGN(InV[blockOffset + jj]) * this->Athresh_;
426  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
427  } else if (k < 0) { // Out of range
429  true, std::runtime_error,
430  "Ifpack2::RILUK::initAllValues: current "
431  "GID k = "
432  << k << " < 0. I'm not sure why this is an error; it is "
433  "probably an artifact of the undocumented assumptions of the "
434  "original implementation (likely copied and pasted from Ifpack). "
435  "Nevertheless, the code I found here insisted on this being an error "
436  "state, so I will throw an exception here.");
437  } else if (k < local_row) {
438  LI[NumL] = k;
439  const LO LBlockOffset = NumL * blockMatSize;
440  for (LO jj = 0; jj < blockMatSize; ++jj)
441  LV[LBlockOffset + jj] = InV[blockOffset + jj];
442  NumL++;
443  } else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
444  UI[NumU] = k;
445  const LO UBlockOffset = NumU * blockMatSize;
446  for (LO jj = 0; jj < blockMatSize; ++jj)
447  UV[UBlockOffset + jj] = InV[blockOffset + jj];
448  NumU++;
449  }
450  }
451 
452  // Check in things for this row of L and U
453 
454  if (DiagFound) {
455  ++NumNonzeroDiags;
456  } else {
457  for (LO jj = 0; jj < blockSize_; ++jj)
458  diagValues[jj * (blockSize_ + 1)] = this->Athresh_;
459  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
460  }
461 
462  if (NumL) {
463  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
464  }
465 
466  if (NumU) {
467  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
468  }
469  }
470 
471  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
472  // ever gets a device implementation.
473  /*
474  {
475  typedef typename block_crs_matrix_type::device_type device_type;
476  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
477  L_block_->template sync<device_type> ();
478  U_block_->template sync<device_type> ();
479  D_block_->template sync<device_type> ();
480  }
481  */
482  this->isInitialized_ = true;
483 }
484 
485 namespace { // (anonymous)
486 
487 // For a given Kokkos::View type, possibly unmanaged, get the
488 // corresponding managed Kokkos::View type. This is handy for
489 // translating from little_block_type or little_host_vec_type (both
490 // possibly unmanaged) to their managed versions.
491 template <class LittleBlockType>
492 struct GetManagedView {
493  static_assert(Kokkos::is_view<LittleBlockType>::value,
494  "LittleBlockType must be a Kokkos::View.");
495  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
496  typename LittleBlockType::array_layout,
497  typename LittleBlockType::device_type>
498  managed_non_const_type;
499  static_assert(static_cast<int>(managed_non_const_type::rank) ==
500  static_cast<int>(LittleBlockType::rank),
501  "managed_non_const_type::rank != LittleBlock::rank. "
502  "Please report this bug to the Ifpack2 developers.");
503 };
504 
505 } // namespace
506 
507 template <class MatrixType>
509  using Teuchos::Array;
510  using Teuchos::RCP;
511  using Teuchos::rcp;
512 
513  typedef impl_scalar_type IST;
514  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
515 
516  // initialize() checks this too, but it's easier for users if the
517  // error shows them the name of the method that they actually
518  // called, rather than the name of some internally called method.
519  TEUCHOS_TEST_FOR_EXCEPTION(this->A_.is_null(), std::runtime_error, prefix << "The matrix (A_, "
520  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
521  "input before calling this method.");
522  TEUCHOS_TEST_FOR_EXCEPTION(!this->A_->isFillComplete(), std::runtime_error, prefix << "The matrix "
523  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
524  "initialize() or compute() with this matrix until the matrix is fill "
525  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
526  "underlying graph is fill complete.");
527 
528  if (!this->isInitialized()) {
529  initialize(); // Don't count this in the compute() time
530  }
531 
532  // // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
533  // // host, so sync to host first. The const_cast is unfortunate but
534  // // is our only option to make this correct.
535  // if (! A_block_.is_null ()) {
536  // Teuchos::RCP<block_crs_matrix_type> A_nc =
537  // Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
538  // // A_nc->sync_host ();
539  // }
540  /*
541  L_block_->sync_host ();
542  U_block_->sync_host ();
543  D_block_->sync_host ();
544  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
545  L_block_->modify_host ();
546  U_block_->modify_host ();
547  D_block_->modify_host ();
548  */
549 
550  Teuchos::Time timer("RBILUK::compute");
551  double startTime = timer.wallTime();
552  { // Start timing
553  Teuchos::TimeMonitor timeMon(timer);
554  this->isComputed_ = false;
555 
556  // MinMachNum should be officially defined, for now pick something a little
557  // bigger than IEEE underflow value
558 
559  // const scalar_type MinDiagonalValue = STS::rmin ();
560  // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
561  if (!this->isKokkosKernelsSpiluk_) {
562  initAllValues();
563  size_t NumIn;
564  LO NumL, NumU, NumURead;
565 
566  // Get Maximum Row length
567  const size_t MaxNumEntries =
568  L_block_->getLocalMaxNumRowEntries() + U_block_->getLocalMaxNumRowEntries() + 1;
569 
570  const LO blockMatSize = blockSize_ * blockSize_;
571 
572  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
573  // expressing these strides explicitly, in order to finish #177
574  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
575  const LO rowStride = blockSize_;
576 
577  Teuchos::Array<int> ipiv_teuchos(blockSize_);
578  Kokkos::View<int*, Kokkos::HostSpace,
579  Kokkos::MemoryUnmanaged>
580  ipiv(ipiv_teuchos.getRawPtr(), blockSize_);
581  Teuchos::Array<IST> work_teuchos(blockSize_);
582  Kokkos::View<IST*, Kokkos::HostSpace,
583  Kokkos::MemoryUnmanaged>
584  work(work_teuchos.getRawPtr(), blockSize_);
585 
586  size_t num_cols = U_block_->getColMap()->getLocalNumElements();
587  Teuchos::Array<int> colflag(num_cols);
588 
589  typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock("diagModBlock", blockSize_, blockSize_);
590  typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp("matTmp", blockSize_, blockSize_);
591  typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier("multiplier", blockSize_, blockSize_);
592 
593  // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
594 
595  // Now start the factorization.
596 
597  // Need some integer workspace and pointers
598  LO NumUU;
599  for (size_t j = 0; j < num_cols; ++j) {
600  colflag[j] = -1;
601  }
602  Teuchos::Array<LO> InI(MaxNumEntries, 0);
603  Teuchos::Array<scalar_type> InV(MaxNumEntries * blockMatSize, STM::zero());
604 
605  const LO numLocalRows = L_block_->getLocalNumRows();
606  for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
607  // Fill InV, InI with current row of L, D and U combined
608 
609  NumIn = MaxNumEntries;
610  local_inds_host_view_type colValsL;
611  values_host_view_type valsL;
612 
613  L_block_->getLocalRowView(local_row, colValsL, valsL);
614  NumL = (LO)colValsL.size();
615  for (LO j = 0; j < NumL; ++j) {
616  const LO matOffset = blockMatSize * j;
617  little_block_host_type lmat((typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
618  little_block_host_type lmatV((typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
619  // lmatV.assign(lmat);
620  Tpetra::COPY(lmat, lmatV);
621  InI[j] = colValsL[j];
622  }
623 
624  little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
625  little_block_host_type dmatV((typename little_block_host_type::value_type*)&InV[NumL * blockMatSize], blockSize_, rowStride);
626  // dmatV.assign(dmat);
627  Tpetra::COPY(dmat, dmatV);
628  InI[NumL] = local_row;
629 
630  local_inds_host_view_type colValsU;
631  values_host_view_type valsU;
632  U_block_->getLocalRowView(local_row, colValsU, valsU);
633  NumURead = (LO)colValsU.size();
634  NumU = 0;
635  for (LO j = 0; j < NumURead; ++j) {
636  if (!(colValsU[j] < numLocalRows)) continue;
637  InI[NumL + 1 + j] = colValsU[j];
638  const LO matOffset = blockMatSize * (NumL + 1 + j);
639  little_block_host_type umat((typename little_block_host_type::value_type*)&valsU[blockMatSize * j], blockSize_, rowStride);
640  little_block_host_type umatV((typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
641  // umatV.assign(umat);
642  Tpetra::COPY(umat, umatV);
643  NumU += 1;
644  }
645  NumIn = NumL + NumU + 1;
646 
647  // Set column flags
648  for (size_t j = 0; j < NumIn; ++j) {
649  colflag[InI[j]] = j;
650  }
651 
652 #ifndef IFPACK2_RBILUK_INITIAL
653  for (LO i = 0; i < blockSize_; ++i)
654  for (LO j = 0; j < blockSize_; ++j) {
655  {
656  diagModBlock(i, j) = 0;
657  }
658  }
659 #else
660  scalar_type diagmod = STM::zero(); // Off-diagonal accumulator
661  Kokkos::deep_copy(diagModBlock, diagmod);
662 #endif
663 
664  for (LO jj = 0; jj < NumL; ++jj) {
665  LO j = InI[jj];
666  little_block_host_type currentVal((typename little_block_host_type::value_type*)&InV[jj * blockMatSize], blockSize_, rowStride); // current_mults++;
667  // multiplier.assign(currentVal);
668  Tpetra::COPY(currentVal, multiplier);
669 
670  const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j, j);
671  // alpha = 1, beta = 0
672 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
673  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
674  KokkosBatched::Trans::NoTranspose,
675  KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), currentVal, dmatInverse, STS::zero(), matTmp);
676 #else
677  Tpetra::GEMM("N", "N", STS::one(), currentVal, dmatInverse,
678  STS::zero(), matTmp);
679 #endif
680  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
681  // currentVal.assign(matTmp);
682  Tpetra::COPY(matTmp, currentVal);
683  local_inds_host_view_type UUI;
684  values_host_view_type UUV;
685 
686  U_block_->getLocalRowView(j, UUI, UUV);
687  NumUU = (LO)UUI.size();
688 
689  if (this->RelaxValue_ == STM::zero()) {
690  for (LO k = 0; k < NumUU; ++k) {
691  if (!(UUI[k] < numLocalRows)) continue;
692  const int kk = colflag[UUI[k]];
693  if (kk > -1) {
694  little_block_host_type kkval((typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
695  little_block_host_type uumat((typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
696 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
697  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
698  KokkosBatched::Trans::NoTranspose,
699  KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
700 #else
701  Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
702  STM::one(), kkval);
703 #endif
704  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
705  }
706  }
707  } else {
708  for (LO k = 0; k < NumUU; ++k) {
709  if (!(UUI[k] < numLocalRows)) continue;
710  const int kk = colflag[UUI[k]];
711  little_block_host_type uumat((typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
712  if (kk > -1) {
713  little_block_host_type kkval((typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
714 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
715  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
716  KokkosBatched::Trans::NoTranspose,
717  KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
718 #else
719  Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
720  STM::one(), kkval);
721 #endif
722  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
723  } else {
724 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
725  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
726  KokkosBatched::Trans::NoTranspose,
727  KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), diagModBlock);
728 #else
729  Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
730  STM::one(), diagModBlock);
731 #endif
732  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
733  }
734  }
735  }
736  }
737  if (NumL) {
738  // Replace current row of L
739  L_block_->replaceLocalValues(local_row, InI.getRawPtr(), InV.getRawPtr(), NumL);
740  }
741 
742  // dmat.assign(dmatV);
743  Tpetra::COPY(dmatV, dmat);
744 
745  if (this->RelaxValue_ != STM::zero()) {
746  // dmat.update(this->RelaxValue_, diagModBlock);
747  Tpetra::AXPY(this->RelaxValue_, diagModBlock, dmat);
748  }
749 
750  // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
751  // if (STS::real (DV[i]) < STM::zero ()) {
752  // DV[i] = -MinDiagonalValue;
753  // }
754  // else {
755  // DV[i] = MinDiagonalValue;
756  // }
757  // }
758  // else
759  {
760  int lapackInfo = 0;
761  for (int k = 0; k < blockSize_; ++k) {
762  ipiv[k] = 0;
763  }
764 
765  Tpetra::GETF2(dmat, ipiv, lapackInfo);
766  // lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
768  lapackInfo != 0, std::runtime_error,
769  "Ifpack2::Experimental::RBILUK::compute: "
770  "lapackInfo = "
771  << lapackInfo << " which indicates an error in the factorization GETRF.");
772 
773  Tpetra::GETRI(dmat, ipiv, work, lapackInfo);
774  // lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
776  lapackInfo != 0, std::runtime_error,
777  "Ifpack2::Experimental::RBILUK::compute: "
778  "lapackInfo = "
779  << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
780  }
781 
782  for (LO j = 0; j < NumU; ++j) {
783  little_block_host_type currentVal((typename little_block_host_type::value_type*)&InV[(NumL + 1 + j) * blockMatSize], blockSize_, rowStride); // current_mults++;
784  // scale U by the diagonal inverse
785 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
786  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
787  KokkosBatched::Trans::NoTranspose,
788  KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), dmat, currentVal, STS::zero(), matTmp);
789 #else
790  Tpetra::GEMM("N", "N", STS::one(), dmat, currentVal,
791  STS::zero(), matTmp);
792 #endif
793  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
794  // currentVal.assign(matTmp);
795  Tpetra::COPY(matTmp, currentVal);
796  }
797 
798  if (NumU) {
799  // Replace current row of L and U
800  U_block_->replaceLocalValues(local_row, &InI[NumL + 1], &InV[blockMatSize * (NumL + 1)], NumU);
801  }
802 
803 #ifndef IFPACK2_RBILUK_INITIAL
804  // Reset column flags
805  for (size_t j = 0; j < NumIn; ++j) {
806  colflag[InI[j]] = -1;
807  }
808 #else
809  // Reset column flags
810  for (size_t j = 0; j < num_cols; ++j) {
811  colflag[j] = -1;
812  }
813 #endif
814  }
815  } // !this->isKokkosKernelsSpiluk_
816  else {
817  RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->A_local_);
818  RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
819  if (A_local_bcrs.is_null()) {
820  if (A_local_crs.is_null()) {
821  local_ordinal_type numRows = this->A_local_->getLocalNumRows();
822  Array<size_t> entriesPerRow(numRows);
823  for (local_ordinal_type i = 0; i < numRows; i++) {
824  entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
825  }
826  RCP<crs_matrix_type> A_local_crs_nc =
827  rcp(new crs_matrix_type(this->A_local_->getRowMap(),
828  this->A_local_->getColMap(),
829  entriesPerRow()));
830  // copy entries into A_local_crs
831  nonconst_local_inds_host_view_type indices("indices", this->A_local_->getLocalMaxNumRowEntries());
832  nonconst_values_host_view_type values("values", this->A_local_->getLocalMaxNumRowEntries());
833  for (local_ordinal_type i = 0; i < numRows; i++) {
834  size_t numEntries = 0;
835  this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
836  A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
837  }
838  A_local_crs_nc->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
839  A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type>(A_local_crs_nc);
840  }
841  // Create bcrs from crs
842  // We can skip fillLogicalBlocks if we know the input is already filled
843  if (blockSize_ > 1) {
844  auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
845  A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
846  } else {
847  A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
848  }
849  }
850 
852  this->isKokkosKernelsStream_, std::runtime_error,
853  "Ifpack2::RBILUK::compute: "
854  "streams are not yet supported.");
855 
856  auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
857  auto A_local_rowmap = lclMtx.graph.row_map;
858  auto A_local_entries = lclMtx.graph.entries;
859  auto A_local_values = lclMtx.values;
860 
861  // L_block_->resumeFill ();
862  // U_block_->resumeFill ();
863 
864  if (L_block_->isLocallyIndexed()) {
865  L_block_->setAllToScalar(STS::zero()); // Zero out L and U matrices
866  U_block_->setAllToScalar(STS::zero());
867  }
868 
869  using row_map_type = typename local_matrix_device_type::row_map_type;
870 
871  auto lclL = L_block_->getLocalMatrixDevice();
872  row_map_type L_rowmap = lclL.graph.row_map;
873  auto L_entries = lclL.graph.entries;
874  auto L_values = lclL.values;
875 
876  auto lclU = U_block_->getLocalMatrixDevice();
877  row_map_type U_rowmap = lclU.graph.row_map;
878  auto U_entries = lclU.graph.entries;
879  auto U_values = lclU.values;
880 
881  KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), this->LevelOfFill_,
882  A_local_rowmap, A_local_entries, A_local_values,
883  L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
884 
885  // Now call symbolic for sptrsvs
886  KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
887  KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
888  }
889  } // Stop timing
890 
891  // Sync everything back to device, for efficient solves.
892  /*
893  {
894  typedef typename block_crs_matrix_type::device_type device_type;
895  if (! A_block_.is_null ()) {
896  Teuchos::RCP<block_crs_matrix_type> A_nc =
897  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
898  A_nc->template sync<device_type> ();
899  }
900  L_block_->template sync<device_type> ();
901  U_block_->template sync<device_type> ();
902  D_block_->template sync<device_type> ();
903  }
904  */
905 
906  this->isComputed_ = true;
907  this->numCompute_ += 1;
908  this->computeTime_ += (timer.wallTime() - startTime);
909 }
910 
911 template <class MatrixType>
913  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
914  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
915  Teuchos::ETransp mode,
916  scalar_type alpha,
917  scalar_type beta) const {
918  using Teuchos::RCP;
919  typedef Tpetra::BlockMultiVector<scalar_type,
921  BMV;
922 
924  this->A_.is_null(), std::runtime_error,
925  "Ifpack2::Experimental::RBILUK::apply: The matrix is "
926  "null. Please call setMatrix() with a nonnull input, then initialize() "
927  "and compute(), before calling this method.");
929  !this->isComputed(), std::runtime_error,
930  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
931  "you must call compute() before calling this method.");
932  TEUCHOS_TEST_FOR_EXCEPTION(
933  X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
934  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
935  "X.getNumVectors() = "
936  << X.getNumVectors()
937  << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
938  TEUCHOS_TEST_FOR_EXCEPTION(
939  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
940  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
941  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
942  "fixed. There is a FIXME in this file about this very issue.");
943 
944  const LO blockMatSize = blockSize_ * blockSize_;
945 
946  const LO rowStride = blockSize_;
947 
948  BMV yBlock(Y, *(this->Graph_->getA_Graph()->getDomainMap()), blockSize_);
949  const BMV xBlock(X, *(this->A_->getColMap()), blockSize_);
950 
951  Teuchos::Array<scalar_type> lclarray(blockSize_);
952  little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
953  const scalar_type one = STM::one();
954  const scalar_type zero = STM::zero();
955 
956  Teuchos::Time timer("RBILUK::apply");
957  double startTime = timer.wallTime();
958  { // Start timing
959  Teuchos::TimeMonitor timeMon(timer);
960  if (!this->isKokkosKernelsSpiluk_) {
961  if (alpha == one && beta == zero) {
962  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
963  // Start by solving L C = X for C. C must have the same Map
964  // as D. We have to use a temp multivector, since our
965  // implementation of triangular solves does not allow its
966  // input and output to alias one another.
967  //
968  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
969  const LO numVectors = xBlock.getNumVectors();
970  BMV cBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
971  BMV rBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
972  for (LO imv = 0; imv < numVectors; ++imv) {
973  for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i) {
974  LO local_row = i;
975  const_host_little_vec_type xval =
976  xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
977  little_host_vec_type cval =
978  cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
979  // cval.assign(xval);
980  Tpetra::COPY(xval, cval);
981 
982  local_inds_host_view_type colValsL;
983  values_host_view_type valsL;
984  L_block_->getLocalRowView(local_row, colValsL, valsL);
985  LO NumL = (LO)colValsL.size();
986 
987  for (LO j = 0; j < NumL; ++j) {
988  LO col = colValsL[j];
989  const_host_little_vec_type prevVal =
990  cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
991 
992  const LO matOffset = blockMatSize * j;
993  little_block_host_type lij((typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
994 
995  // cval.matvecUpdate(-one, lij, prevVal);
996  Tpetra::GEMV(-one, lij, prevVal, cval);
997  }
998  }
999  }
1000 
1001  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
1002  D_block_->applyBlock(cBlock, rBlock);
1003 
1004  // Solve U Y = R.
1005  for (LO imv = 0; imv < numVectors; ++imv) {
1006  const LO numRows = D_block_->getLocalNumRows();
1007  for (LO i = 0; i < numRows; ++i) {
1008  LO local_row = (numRows - 1) - i;
1009  const_host_little_vec_type rval =
1010  rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1011  little_host_vec_type yval =
1012  yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1013  // yval.assign(rval);
1014  Tpetra::COPY(rval, yval);
1015 
1016  local_inds_host_view_type colValsU;
1017  values_host_view_type valsU;
1018  U_block_->getLocalRowView(local_row, colValsU, valsU);
1019  LO NumU = (LO)colValsU.size();
1020 
1021  for (LO j = 0; j < NumU; ++j) {
1022  LO col = colValsU[NumU - 1 - j];
1023  const_host_little_vec_type prevVal =
1024  yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1025 
1026  const LO matOffset = blockMatSize * (NumU - 1 - j);
1027  little_block_host_type uij((typename little_block_host_type::value_type*)&valsU[matOffset], blockSize_, rowStride);
1028 
1029  // yval.matvecUpdate(-one, uij, prevVal);
1030  Tpetra::GEMV(-one, uij, prevVal, yval);
1031  }
1032  }
1033  }
1034  } else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1035  TEUCHOS_TEST_FOR_EXCEPTION(
1036  true, std::runtime_error,
1037  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1038  }
1039  } else { // alpha != 1 or beta != 0
1040  if (alpha == zero) {
1041  if (beta == zero) {
1042  Y.putScalar(zero);
1043  } else {
1044  Y.scale(beta);
1045  }
1046  } else { // alpha != zero
1047  MV Y_tmp(Y.getMap(), Y.getNumVectors());
1048  apply(X, Y_tmp, mode);
1049  Y.update(alpha, Y_tmp, beta);
1050  }
1051  }
1052  } else {
1053  // Kokkos kernels impl.
1054  auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1055  auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1056 
1057  auto lclL = L_block_->getLocalMatrixDevice();
1058  auto L_rowmap = lclL.graph.row_map;
1059  auto L_entries = lclL.graph.entries;
1060  auto L_values = lclL.values;
1061 
1062  auto lclU = U_block_->getLocalMatrixDevice();
1063  auto U_rowmap = lclU.graph.row_map;
1064  auto U_entries = lclU.graph.entries;
1065  auto U_values = lclU.values;
1066 
1067  if (mode == Teuchos::NO_TRANS) {
1068  {
1069  const LO numVecs = X.getNumVectors();
1070  for (LO vec = 0; vec < numVecs; ++vec) {
1071  auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1072  auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1073  KokkosSparse::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1074  }
1075  }
1076 
1077  {
1078  const LO numVecs = X.getNumVectors();
1079  for (LO vec = 0; vec < numVecs; ++vec) {
1080  auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1081  KokkosSparse::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1082  }
1083  }
1084 
1085  KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1086  } else {
1087  TEUCHOS_TEST_FOR_EXCEPTION(
1088  true, std::runtime_error,
1089  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1090  }
1091 
1092  // Y.getWrappedDualView().sync();
1093  }
1094  } // Stop timing
1095 
1096  this->numApply_ += 1;
1097  this->applyTime_ += (timer.wallTime() - startTime);
1098 }
1099 
1100 template <class MatrixType>
1101 std::string RBILUK<MatrixType>::description() const {
1102  std::ostringstream os;
1103 
1104  // Output is a valid YAML dictionary in flow style. If you don't
1105  // like everything on a single line, you should call describe()
1106  // instead.
1107  os << "\"Ifpack2::Experimental::RBILUK\": {";
1108  os << "Initialized: " << (this->isInitialized() ? "true" : "false") << ", "
1109  << "Computed: " << (this->isComputed() ? "true" : "false") << ", ";
1110 
1111  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
1112 
1113  if (this->A_.is_null()) {
1114  os << "Matrix: null";
1115  } else {
1116  os << "Global matrix dimensions: ["
1117  << this->A_->getGlobalNumRows() << ", " << this->A_->getGlobalNumCols() << "]"
1118  << ", Global nnz: " << this->A_->getGlobalNumEntries();
1119  }
1120 
1121  os << "}";
1122  return os.str();
1123 }
1124 
1125 } // namespace Experimental
1126 
1127 } // namespace Ifpack2
1128 
1129 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
1130 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
1131 // handled internally via dynamic cast.
1132 
1133 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S, LO, GO, N) \
1134  template class Ifpack2::Experimental::RBILUK<Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1135  template class Ifpack2::Experimental::RBILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
1136 
1137 #endif
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:257
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:111
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:91
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:140
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:101
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:913
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:118
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:115
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:132
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:107
T * getRawPtr() const
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:127
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:508
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:94
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:114
size_type size() const
static double wallTime()
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:125
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1101
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:95
bool is_null() const