10 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
11 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
13 #include "Tpetra_BlockMultiVector.hpp"
14 #include "Tpetra_BlockView.hpp"
15 #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
16 #include "Ifpack2_OverlappingRowMatrix.hpp"
17 #include "Ifpack2_Details_getCrsMatrix.hpp"
18 #include "Ifpack2_LocalFilter.hpp"
20 #include "Ifpack2_RILUK.hpp"
21 #include "KokkosSparse_sptrsv.hpp"
26 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
27 #include "KokkosBatched_Gemm_Decl.hpp"
28 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
29 #include "KokkosBatched_Util.hpp"
34 namespace Experimental {
37 template <
class MatrixType>
38 struct LocalRowHandler {
39 using LocalOrdinal =
typename MatrixType::local_ordinal_type;
40 using row_matrix_type = Tpetra::RowMatrix<
41 typename MatrixType::scalar_type,
43 typename MatrixType::global_ordinal_type,
44 typename MatrixType::node_type>;
45 using inds_type =
typename row_matrix_type::local_inds_host_view_type;
46 using vals_type =
typename row_matrix_type::values_host_view_type;
50 if (!A_->supportsRowViews()) {
51 const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
52 const auto blockSize = A_->getBlockSize();
53 ind_nc_ = inds_type_nc(
"Ifpack2::RBILUK::LocalRowHandler::indices", maxNumRowEntr);
54 val_nc_ = vals_type_nc(
"Ifpack2::RBILUK::LocalRowHandler::values", maxNumRowEntr * blockSize * blockSize);
58 void getLocalRow(LocalOrdinal local_row, inds_type& InI, vals_type& InV, LocalOrdinal& NumIn) {
59 if (A_->supportsRowViews()) {
60 A_->getLocalRowView(local_row, InI, InV);
61 NumIn = (LocalOrdinal)InI.size();
64 A_->getLocalRowCopy(local_row, ind_nc_, val_nc_, cnt);
67 NumIn = (LocalOrdinal)cnt;
72 using inds_type_nc =
typename row_matrix_type::nonconst_local_inds_host_view_type;
73 using vals_type_nc =
typename row_matrix_type::nonconst_values_host_view_type;
82 template <
class MatrixType>
86 template <
class MatrixType>
90 template <
class MatrixType>
93 template <
class MatrixType>
102 this->isAllocated_ =
false;
103 this->isInitialized_ =
false;
104 this->isComputed_ =
false;
105 this->Graph_ = Teuchos::null;
106 L_block_ = Teuchos::null;
107 U_block_ = Teuchos::null;
108 D_block_ = Teuchos::null;
112 template <
class MatrixType>
113 const typename RBILUK<MatrixType>::block_crs_matrix_type&
116 L_block_.is_null(), std::runtime_error,
117 "Ifpack2::RILUK::getL: The L factor "
118 "is null. Please call initialize() (and preferably also compute()) "
119 "before calling this method. If the input matrix has not yet been set, "
120 "you must first call setMatrix() with a nonnull input matrix before you "
121 "may call initialize() or compute().");
125 template <
class MatrixType>
126 const typename RBILUK<MatrixType>::block_crs_matrix_type&
129 D_block_.is_null(), std::runtime_error,
130 "Ifpack2::RILUK::getD: The D factor "
131 "(of diagonal entries) is null. Please call initialize() (and "
132 "preferably also compute()) before calling this method. If the input "
133 "matrix has not yet been set, you must first call setMatrix() with a "
134 "nonnull input matrix before you may call initialize() or compute().");
138 template <
class MatrixType>
139 const typename RBILUK<MatrixType>::block_crs_matrix_type&
142 U_block_.is_null(), std::runtime_error,
143 "Ifpack2::RILUK::getU: The U factor "
144 "is null. Please call initialize() (and preferably also compute()) "
145 "before calling this method. If the input matrix has not yet been set, "
146 "you must first call setMatrix() with a nonnull input matrix before you "
147 "may call initialize() or compute().");
151 template <
class MatrixType>
156 if (!this->isAllocated_) {
168 L_block_ =
rcp(
new block_crs_matrix_type(*this->Graph_->getL_Graph(), blockSize_));
169 U_block_ =
rcp(
new block_crs_matrix_type(*this->Graph_->getU_Graph(), blockSize_));
170 D_block_ =
rcp(
new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
172 L_block_->setAllToScalar(STM::zero());
173 U_block_->setAllToScalar(STM::zero());
174 D_block_->setAllToScalar(STM::zero());
177 if (this->isKokkosKernelsSpiluk_) {
178 const auto numRows = L_block_->getLocalNumRows();
179 tmp_ = decltype(tmp_)(
"RBILUK::tmp_", numRows * blockSize_);
182 this->isAllocated_ =
true;
187 template <
class MatrixType>
189 getBlockCrsGraph(
const Teuchos::RCP<
const typename RBILUK<MatrixType>::row_matrix_type>& A) {
190 using local_ordinal_type =
typename MatrixType::local_ordinal_type;
193 using Teuchos::rcp_const_cast;
194 using Teuchos::rcp_dynamic_cast;
195 using Teuchos::rcpFromRef;
196 using row_matrix_type =
typename RBILUK<MatrixType>::row_matrix_type;
197 using crs_graph_type =
typename RBILUK<MatrixType>::crs_graph_type;
198 using block_crs_matrix_type =
typename RBILUK<MatrixType>::block_crs_matrix_type;
203 RCP<const LocalFilter<row_matrix_type> > filteredA =
204 rcp_dynamic_cast<
const LocalFilter<row_matrix_type> >(A_local);
205 RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
206 RCP<const block_crs_matrix_type> A_block = Teuchos::null;
207 if (!filteredA.is_null()) {
208 overlappedA = rcp_dynamic_cast<
const OverlappingRowMatrix<row_matrix_type> >(filteredA->getUnderlyingMatrix());
211 if (!overlappedA.is_null()) {
212 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
213 }
else if (!filteredA.is_null()) {
215 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
217 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(A_local);
220 if (!A_block.is_null()) {
221 return rcpFromRef(A_block->getCrsGraph());
227 local_ordinal_type numRows = A_local->getLocalNumRows();
229 for (local_ordinal_type i = 0; i < numRows; i++) {
230 entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
232 RCP<crs_graph_type> A_local_crs_nc =
233 rcp(
new crs_graph_type(A_local->getRowMap(),
234 A_local->getColMap(),
238 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
239 LocalRowHandler_t localRowHandler(A_local);
240 typename LocalRowHandler_t::inds_type indices;
241 typename LocalRowHandler_t::vals_type values;
242 for (local_ordinal_type i = 0; i < numRows; i++) {
243 local_ordinal_type numEntries = 0;
244 localRowHandler.getLocalRow(i, indices, values, numEntries);
245 A_local_crs_nc->insertLocalIndices(i, numEntries, indices.data());
249 A_local_crs_nc->fillComplete(A_local->getDomainMap(), A_local->getRangeMap());
250 return rcp_const_cast<
const crs_graph_type>(A_local_crs_nc);
256 template <
class MatrixType>
260 using Teuchos::rcp_dynamic_cast;
261 const char prefix[] =
"Ifpack2::Experimental::RBILUK::initialize: ";
264 "RowMatrix) is null. Please call setMatrix() with a nonnull input "
265 "before calling this method.");
267 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
268 "initialize() or compute() with this matrix until the matrix is fill "
269 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
270 "underlying graph is fill complete.");
272 blockSize_ = this->A_->getBlockSize();
273 this->A_local_ = this->makeLocalFilter(this->A_);
276 double startTime = timer.
wallTime();
287 this->isInitialized_ =
false;
288 this->isAllocated_ =
false;
289 this->isComputed_ =
false;
290 this->Graph_ = Teuchos::null;
292 RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
294 this->LevelOfFill_, 0));
296 if (this->isKokkosKernelsSpiluk_) {
297 this->KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
298 const auto numRows = this->A_local_->getLocalNumRows();
299 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
301 2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
302 2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
304 this->Graph_->initialize(KernelHandle_);
306 this->L_Sptrsv_KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
307 this->U_Sptrsv_KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
309 KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
311 this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows,
true , blockSize_);
312 this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows,
false , blockSize_);
314 this->Graph_->initialize();
317 allocate_L_and_U_blocks();
319 #ifdef IFPACK2_RBILUK_INITIAL
324 this->isInitialized_ =
true;
325 this->numInitialize_ += 1;
326 this->initializeTime_ += (timer.
wallTime() - startTime);
329 template <
class MatrixType>
333 typedef Tpetra::Map<LO, GO, node_type> map_type;
335 LO NumIn = 0, NumL = 0, NumU = 0;
336 bool DiagFound =
false;
337 size_t NumNonzeroDiags = 0;
338 size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
339 LO blockMatSize = blockSize_ * blockSize_;
346 bool gidsAreConsistentlyOrdered =
true;
347 GO indexOfInconsistentGID = 0;
348 for (GO i = 0; i < rowGIDs.
size(); ++i) {
349 if (rowGIDs[i] != colGIDs[i]) {
350 gidsAreConsistentlyOrdered =
false;
351 indexOfInconsistentGID = i;
356 "The ordering of the local GIDs in the row and column maps is not the same"
358 <<
"at index " << indexOfInconsistentGID
359 <<
". Consistency is required, as all calculations are done with"
361 <<
"local indexing.");
372 L_block_->setAllToScalar(STM::zero());
373 U_block_->setAllToScalar(STM::zero());
374 D_block_->setAllToScalar(STM::zero());
391 RCP<const map_type> rowMap = L_block_->getRowMap();
403 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
404 LocalRowHandler_t localRowHandler(this->A_);
405 typename LocalRowHandler_t::inds_type InI;
406 typename LocalRowHandler_t::vals_type InV;
407 for (
size_t myRow = 0; myRow < this->A_->getLocalNumRows(); ++myRow) {
408 LO local_row = myRow;
410 localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
417 for (LO j = 0; j < NumIn; ++j) {
419 const LO blockOffset = blockMatSize * j;
421 if (k == local_row) {
424 for (LO jj = 0; jj < blockMatSize; ++jj)
425 diagValues[jj] = this->Rthresh_ * InV[blockOffset + jj] + IFPACK2_SGN(InV[blockOffset + jj]) * this->Athresh_;
426 D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
429 true, std::runtime_error,
430 "Ifpack2::RILUK::initAllValues: current "
432 << k <<
" < 0. I'm not sure why this is an error; it is "
433 "probably an artifact of the undocumented assumptions of the "
434 "original implementation (likely copied and pasted from Ifpack). "
435 "Nevertheless, the code I found here insisted on this being an error "
436 "state, so I will throw an exception here.");
437 }
else if (k < local_row) {
439 const LO LBlockOffset = NumL * blockMatSize;
440 for (LO jj = 0; jj < blockMatSize; ++jj)
441 LV[LBlockOffset + jj] = InV[blockOffset + jj];
443 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
445 const LO UBlockOffset = NumU * blockMatSize;
446 for (LO jj = 0; jj < blockMatSize; ++jj)
447 UV[UBlockOffset + jj] = InV[blockOffset + jj];
457 for (LO jj = 0; jj < blockSize_; ++jj)
458 diagValues[jj * (blockSize_ + 1)] = this->Athresh_;
459 D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
463 L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
467 U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
482 this->isInitialized_ =
true;
491 template <
class LittleBlockType>
492 struct GetManagedView {
493 static_assert(Kokkos::is_view<LittleBlockType>::value,
494 "LittleBlockType must be a Kokkos::View.");
495 typedef Kokkos::View<
typename LittleBlockType::non_const_data_type,
496 typename LittleBlockType::array_layout,
497 typename LittleBlockType::device_type>
498 managed_non_const_type;
499 static_assert(static_cast<int>(managed_non_const_type::rank) ==
500 static_cast<int>(LittleBlockType::rank),
501 "managed_non_const_type::rank != LittleBlock::rank. "
502 "Please report this bug to the Ifpack2 developers.");
507 template <
class MatrixType>
513 typedef impl_scalar_type IST;
514 const char prefix[] =
"Ifpack2::Experimental::RBILUK::compute: ";
520 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
521 "input before calling this method.");
523 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
524 "initialize() or compute() with this matrix until the matrix is fill "
525 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
526 "underlying graph is fill complete.");
528 if (!this->isInitialized()) {
551 double startTime = timer.
wallTime();
554 this->isComputed_ =
false;
561 if (!this->isKokkosKernelsSpiluk_) {
564 LO NumL, NumU, NumURead;
567 const size_t MaxNumEntries =
568 L_block_->getLocalMaxNumRowEntries() + U_block_->getLocalMaxNumRowEntries() + 1;
570 const LO blockMatSize = blockSize_ * blockSize_;
575 const LO rowStride = blockSize_;
578 Kokkos::View<
int*, Kokkos::HostSpace,
579 Kokkos::MemoryUnmanaged>
580 ipiv(ipiv_teuchos.
getRawPtr(), blockSize_);
582 Kokkos::View<IST*, Kokkos::HostSpace,
583 Kokkos::MemoryUnmanaged>
584 work(work_teuchos.getRawPtr(), blockSize_);
586 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
589 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock(
"diagModBlock", blockSize_, blockSize_);
590 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp(
"matTmp", blockSize_, blockSize_);
591 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier(
"multiplier", blockSize_, blockSize_);
599 for (
size_t j = 0; j < num_cols; ++j) {
605 const LO numLocalRows = L_block_->getLocalNumRows();
606 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
609 NumIn = MaxNumEntries;
610 local_inds_host_view_type colValsL;
611 values_host_view_type valsL;
613 L_block_->getLocalRowView(local_row, colValsL, valsL);
614 NumL = (LO)colValsL.size();
615 for (LO j = 0; j < NumL; ++j) {
616 const LO matOffset = blockMatSize * j;
617 little_block_host_type lmat((
typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
618 little_block_host_type lmatV((
typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
620 Tpetra::COPY(lmat, lmatV);
621 InI[j] = colValsL[j];
624 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
625 little_block_host_type dmatV((
typename little_block_host_type::value_type*)&InV[NumL * blockMatSize], blockSize_, rowStride);
627 Tpetra::COPY(dmat, dmatV);
628 InI[NumL] = local_row;
630 local_inds_host_view_type colValsU;
631 values_host_view_type valsU;
632 U_block_->getLocalRowView(local_row, colValsU, valsU);
633 NumURead = (LO)colValsU.
size();
635 for (LO j = 0; j < NumURead; ++j) {
636 if (!(colValsU[j] < numLocalRows))
continue;
637 InI[NumL + 1 + j] = colValsU[j];
638 const LO matOffset = blockMatSize * (NumL + 1 + j);
639 little_block_host_type umat((
typename little_block_host_type::value_type*)&valsU[blockMatSize * j], blockSize_, rowStride);
640 little_block_host_type umatV((
typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
642 Tpetra::COPY(umat, umatV);
645 NumIn = NumL + NumU + 1;
648 for (
size_t j = 0; j < NumIn; ++j) {
652 #ifndef IFPACK2_RBILUK_INITIAL
653 for (LO i = 0; i < blockSize_; ++i)
654 for (LO j = 0; j < blockSize_; ++j) {
656 diagModBlock(i, j) = 0;
661 Kokkos::deep_copy(diagModBlock, diagmod);
664 for (LO jj = 0; jj < NumL; ++jj) {
666 little_block_host_type currentVal((
typename little_block_host_type::value_type*)&InV[jj * blockMatSize], blockSize_, rowStride);
668 Tpetra::COPY(currentVal, multiplier);
670 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j, j);
672 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
673 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
674 KokkosBatched::Trans::NoTranspose,
675 KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), currentVal, dmatInverse, STS::zero(), matTmp);
677 Tpetra::GEMM(
"N",
"N", STS::one(), currentVal, dmatInverse,
678 STS::zero(), matTmp);
682 Tpetra::COPY(matTmp, currentVal);
683 local_inds_host_view_type UUI;
684 values_host_view_type UUV;
686 U_block_->getLocalRowView(j, UUI, UUV);
687 NumUU = (LO)UUI.size();
689 if (this->RelaxValue_ == STM::zero()) {
690 for (LO k = 0; k < NumUU; ++k) {
691 if (!(UUI[k] < numLocalRows))
continue;
692 const int kk = colflag[UUI[k]];
694 little_block_host_type kkval((
typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
695 little_block_host_type uumat((
typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
696 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
697 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
698 KokkosBatched::Trans::NoTranspose,
699 KokkosBatched::Algo::Gemm::Blocked>::invoke(
magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
701 Tpetra::GEMM(
"N",
"N",
magnitude_type(-STM::one()), multiplier, uumat,
708 for (LO k = 0; k < NumUU; ++k) {
709 if (!(UUI[k] < numLocalRows))
continue;
710 const int kk = colflag[UUI[k]];
711 little_block_host_type uumat((
typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
713 little_block_host_type kkval((
typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
714 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
715 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
716 KokkosBatched::Trans::NoTranspose,
717 KokkosBatched::Algo::Gemm::Blocked>::invoke(
magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
719 Tpetra::GEMM(
"N",
"N",
magnitude_type(-STM::one()), multiplier, uumat,
724 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
725 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
726 KokkosBatched::Trans::NoTranspose,
727 KokkosBatched::Algo::Gemm::Blocked>::invoke(
magnitude_type(-STM::one()), multiplier, uumat, STM::one(), diagModBlock);
729 Tpetra::GEMM(
"N",
"N",
magnitude_type(-STM::one()), multiplier, uumat,
730 STM::one(), diagModBlock);
743 Tpetra::COPY(dmatV, dmat);
745 if (this->RelaxValue_ != STM::zero()) {
747 Tpetra::AXPY(this->RelaxValue_, diagModBlock, dmat);
761 for (
int k = 0; k < blockSize_; ++k) {
765 Tpetra::GETF2(dmat, ipiv, lapackInfo);
768 lapackInfo != 0, std::runtime_error,
769 "Ifpack2::Experimental::RBILUK::compute: "
771 << lapackInfo <<
" which indicates an error in the factorization GETRF.");
773 Tpetra::GETRI(dmat, ipiv, work, lapackInfo);
776 lapackInfo != 0, std::runtime_error,
777 "Ifpack2::Experimental::RBILUK::compute: "
779 << lapackInfo <<
" which indicates an error in the matrix inverse GETRI.");
782 for (LO j = 0; j < NumU; ++j) {
783 little_block_host_type currentVal((
typename little_block_host_type::value_type*)&InV[(NumL + 1 + j) * blockMatSize], blockSize_, rowStride);
785 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
786 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
787 KokkosBatched::Trans::NoTranspose,
788 KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), dmat, currentVal, STS::zero(), matTmp);
790 Tpetra::GEMM(
"N",
"N", STS::one(), dmat, currentVal,
791 STS::zero(), matTmp);
795 Tpetra::COPY(matTmp, currentVal);
800 U_block_->replaceLocalValues(local_row, &InI[NumL + 1], &InV[blockMatSize * (NumL + 1)], NumU);
803 #ifndef IFPACK2_RBILUK_INITIAL
805 for (
size_t j = 0; j < NumIn; ++j) {
806 colflag[InI[j]] = -1;
810 for (
size_t j = 0; j < num_cols; ++j) {
817 RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->A_local_);
818 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
819 if (A_local_bcrs.is_null()) {
820 if (A_local_crs.is_null()) {
822 Array<size_t> entriesPerRow(numRows);
824 entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
826 RCP<crs_matrix_type> A_local_crs_nc =
828 this->A_local_->getColMap(),
831 nonconst_local_inds_host_view_type indices(
"indices", this->A_local_->getLocalMaxNumRowEntries());
832 nonconst_values_host_view_type values(
"values", this->A_local_->getLocalMaxNumRowEntries());
834 size_t numEntries = 0;
835 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
836 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
838 A_local_crs_nc->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
839 A_local_crs = Teuchos::rcp_const_cast<
const crs_matrix_type>(A_local_crs_nc);
843 if (blockSize_ > 1) {
844 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
845 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
847 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
852 this->isKokkosKernelsStream_, std::runtime_error,
853 "Ifpack2::RBILUK::compute: "
854 "streams are not yet supported.");
856 auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
857 auto A_local_rowmap = lclMtx.graph.row_map;
858 auto A_local_entries = lclMtx.graph.entries;
859 auto A_local_values = lclMtx.values;
864 if (L_block_->isLocallyIndexed()) {
865 L_block_->setAllToScalar(STS::zero());
866 U_block_->setAllToScalar(STS::zero());
869 using row_map_type =
typename local_matrix_device_type::row_map_type;
871 auto lclL = L_block_->getLocalMatrixDevice();
872 row_map_type L_rowmap = lclL.graph.row_map;
873 auto L_entries = lclL.graph.entries;
874 auto L_values = lclL.values;
876 auto lclU = U_block_->getLocalMatrixDevice();
877 row_map_type U_rowmap = lclU.graph.row_map;
878 auto U_entries = lclU.graph.entries;
879 auto U_values = lclU.values;
881 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), this->LevelOfFill_,
882 A_local_rowmap, A_local_entries, A_local_values,
883 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
886 KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
887 KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
906 this->isComputed_ =
true;
907 this->numCompute_ += 1;
908 this->computeTime_ += (timer.
wallTime() - startTime);
911 template <
class MatrixType>
913 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
914 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
924 this->A_.
is_null(), std::runtime_error,
925 "Ifpack2::Experimental::RBILUK::apply: The matrix is "
926 "null. Please call setMatrix() with a nonnull input, then initialize() "
927 "and compute(), before calling this method.");
929 !this->isComputed(), std::runtime_error,
930 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
931 "you must call compute() before calling this method.");
932 TEUCHOS_TEST_FOR_EXCEPTION(
933 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
934 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
935 "X.getNumVectors() = "
937 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
938 TEUCHOS_TEST_FOR_EXCEPTION(
940 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
941 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
942 "fixed. There is a FIXME in this file about this very issue.");
944 const LO blockMatSize = blockSize_ * blockSize_;
946 const LO rowStride = blockSize_;
948 BMV yBlock(Y, *(this->Graph_->getA_Graph()->getDomainMap()), blockSize_);
949 const BMV xBlock(X, *(this->A_->getColMap()), blockSize_);
952 little_host_vec_type lclvec((
typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
953 const scalar_type one = STM::one();
954 const scalar_type zero = STM::zero();
957 double startTime = timer.
wallTime();
960 if (!this->isKokkosKernelsSpiluk_) {
961 if (alpha == one && beta == zero) {
969 const LO numVectors = xBlock.getNumVectors();
970 BMV cBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
971 BMV rBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
972 for (LO imv = 0; imv < numVectors; ++imv) {
973 for (
size_t i = 0; i < D_block_->getLocalNumRows(); ++i) {
975 const_host_little_vec_type xval =
976 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
977 little_host_vec_type cval =
978 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
980 Tpetra::COPY(xval, cval);
982 local_inds_host_view_type colValsL;
983 values_host_view_type valsL;
984 L_block_->getLocalRowView(local_row, colValsL, valsL);
985 LO NumL = (LO)colValsL.size();
987 for (LO j = 0; j < NumL; ++j) {
988 LO col = colValsL[j];
989 const_host_little_vec_type prevVal =
990 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
992 const LO matOffset = blockMatSize * j;
993 little_block_host_type lij((
typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
996 Tpetra::GEMV(-one, lij, prevVal, cval);
1002 D_block_->applyBlock(cBlock, rBlock);
1005 for (LO imv = 0; imv < numVectors; ++imv) {
1006 const LO numRows = D_block_->getLocalNumRows();
1007 for (LO i = 0; i < numRows; ++i) {
1008 LO local_row = (numRows - 1) - i;
1009 const_host_little_vec_type rval =
1010 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1011 little_host_vec_type yval =
1012 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1014 Tpetra::COPY(rval, yval);
1016 local_inds_host_view_type colValsU;
1017 values_host_view_type valsU;
1018 U_block_->getLocalRowView(local_row, colValsU, valsU);
1019 LO NumU = (LO)colValsU.size();
1021 for (LO j = 0; j < NumU; ++j) {
1022 LO col = colValsU[NumU - 1 - j];
1023 const_host_little_vec_type prevVal =
1024 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1026 const LO matOffset = blockMatSize * (NumU - 1 - j);
1027 little_block_host_type uij((
typename little_block_host_type::value_type*)&valsU[matOffset], blockSize_, rowStride);
1030 Tpetra::GEMV(-one, uij, prevVal, yval);
1035 TEUCHOS_TEST_FOR_EXCEPTION(
1036 true, std::runtime_error,
1037 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1040 if (alpha == zero) {
1047 MV Y_tmp(Y.getMap(), Y.getNumVectors());
1048 apply(X, Y_tmp, mode);
1049 Y.update(alpha, Y_tmp, beta);
1054 auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1055 auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1057 auto lclL = L_block_->getLocalMatrixDevice();
1058 auto L_rowmap = lclL.graph.row_map;
1059 auto L_entries = lclL.graph.entries;
1060 auto L_values = lclL.values;
1062 auto lclU = U_block_->getLocalMatrixDevice();
1063 auto U_rowmap = lclU.graph.row_map;
1064 auto U_entries = lclU.graph.entries;
1065 auto U_values = lclU.values;
1069 const LO numVecs = X.getNumVectors();
1070 for (LO vec = 0; vec < numVecs; ++vec) {
1071 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1072 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1073 KokkosSparse::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1078 const LO numVecs = X.getNumVectors();
1079 for (LO vec = 0; vec < numVecs; ++vec) {
1080 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1081 KokkosSparse::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1085 KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1087 TEUCHOS_TEST_FOR_EXCEPTION(
1088 true, std::runtime_error,
1089 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1096 this->numApply_ += 1;
1097 this->applyTime_ += (timer.
wallTime() - startTime);
1100 template <
class MatrixType>
1102 std::ostringstream os;
1107 os <<
"\"Ifpack2::Experimental::RBILUK\": {";
1108 os <<
"Initialized: " << (this->isInitialized() ?
"true" :
"false") <<
", "
1109 <<
"Computed: " << (this->isComputed() ?
"true" :
"false") <<
", ";
1111 os <<
"Level-of-fill: " << this->getLevelOfFill() <<
", ";
1114 os <<
"Matrix: null";
1116 os <<
"Global matrix dimensions: ["
1117 << this->A_->getGlobalNumRows() <<
", " << this->A_->getGlobalNumCols() <<
"]"
1118 <<
", Global nnz: " << this->A_->getGlobalNumEntries();
1133 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S, LO, GO, N) \
1134 template class Ifpack2::Experimental::RBILUK<Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1135 template class Ifpack2::Experimental::RBILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:257
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:111
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:91
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:140
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:101
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:913
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:118
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:115
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:132
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:107
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:127
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:508
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:94
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:114
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:125
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1101
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:95