10 #ifndef IFPACK2_CRSRILUK_DEF_HPP
11 #define IFPACK2_CRSRILUK_DEF_HPP
14 #include "Ifpack2_LocalFilter.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Teuchos_StandardParameterEntryValidators.hpp"
17 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
18 #include "Ifpack2_Details_getParamTryingTypes.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "Kokkos_Sort.hpp"
21 #include "KokkosSparse_Utils.hpp"
22 #include "KokkosKernels_Sorting.hpp"
23 #include "KokkosSparse_IOUtils.hpp"
36 type_strs[0] =
"Serial";
37 type_strs[1] =
"KSPILUK";
39 type_enums[0] = Serial;
40 type_enums[1] = KSPILUK;
45 template <
class MatrixType>
51 , isInitialized_(false)
56 , initializeTime_(0.0)
62 , isKokkosKernelsSpiluk_(false)
63 , isKokkosKernelsStream_(false)
65 , hasStreamReordered_(false) {
69 template <
class MatrixType>
75 , isInitialized_(false)
80 , initializeTime_(0.0)
86 , isKokkosKernelsSpiluk_(false)
87 , isKokkosKernelsStream_(false)
89 , hasStreamReordered_(false) {
93 template <
class MatrixType>
95 if (!isKokkosKernelsStream_) {
97 KernelHandle_->destroy_spiluk_handle();
100 for (
int i = 0; i < num_streams_; i++) {
102 KernelHandle_v_[i]->destroy_spiluk_handle();
108 template <
class MatrixType>
111 L_solver_->setObjectLabel(
"lower");
113 U_solver_->setObjectLabel(
"upper");
116 template <
class MatrixType>
123 isAllocated_ =
false;
124 isInitialized_ =
false;
126 A_local_ = Teuchos::null;
127 Graph_ = Teuchos::null;
134 if (!L_solver_.is_null()) {
135 L_solver_->setMatrix(Teuchos::null);
137 if (!U_solver_.is_null()) {
138 U_solver_->setMatrix(Teuchos::null);
148 template <
class MatrixType>
152 L_.is_null(), std::runtime_error,
153 "Ifpack2::RILUK::getL: The L factor "
154 "is null. Please call initialize() (and preferably also compute()) "
155 "before calling this method. If the input matrix has not yet been set, "
156 "you must first call setMatrix() with a nonnull input matrix before you "
157 "may call initialize() or compute().");
161 template <
class MatrixType>
162 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
168 D_.is_null(), std::runtime_error,
169 "Ifpack2::RILUK::getD: The D factor "
170 "(of diagonal entries) is null. Please call initialize() (and "
171 "preferably also compute()) before calling this method. If the input "
172 "matrix has not yet been set, you must first call setMatrix() with a "
173 "nonnull input matrix before you may call initialize() or compute().");
177 template <
class MatrixType>
181 U_.is_null(), std::runtime_error,
182 "Ifpack2::RILUK::getU: The U factor "
183 "is null. Please call initialize() (and preferably also compute()) "
184 "before calling this method. If the input matrix has not yet been set, "
185 "you must first call setMatrix() with a nonnull input matrix before you "
186 "may call initialize() or compute().");
190 template <
class MatrixType>
193 A_.
is_null(), std::runtime_error,
194 "Ifpack2::RILUK::getNodeSmootherComplexity: "
195 "The input matrix A is null. Please call setMatrix() with a nonnull "
196 "input matrix, then call compute(), before calling this method.");
198 if (!L_.is_null() && !U_.is_null())
199 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
204 template <
class MatrixType>
210 A_.
is_null(), std::runtime_error,
211 "Ifpack2::RILUK::getDomainMap: "
212 "The matrix is null. Please call setMatrix() with a nonnull input "
213 "before calling this method.");
217 Graph_.is_null(), std::runtime_error,
218 "Ifpack2::RILUK::getDomainMap: "
219 "The computed graph is null. Please call initialize() before calling "
221 return Graph_->getL_Graph()->getDomainMap();
224 template <
class MatrixType>
230 A_.
is_null(), std::runtime_error,
231 "Ifpack2::RILUK::getRangeMap: "
232 "The matrix is null. Please call setMatrix() with a nonnull input "
233 "before calling this method.");
237 Graph_.is_null(), std::runtime_error,
238 "Ifpack2::RILUK::getRangeMap: "
239 "The computed graph is null. Please call initialize() before calling "
241 return Graph_->getL_Graph()->getRangeMap();
244 template <
class MatrixType>
250 if (!isKokkosKernelsStream_) {
259 L_ =
rcp(
new crs_matrix_type(Graph_->getL_Graph()));
260 U_ =
rcp(
new crs_matrix_type(Graph_->getU_Graph()));
261 L_->setAllToScalar(STS::zero());
262 U_->setAllToScalar(STS::zero());
267 D_ =
rcp(
new vec_type(Graph_->getL_Graph()->getRowMap()));
269 L_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_);
270 U_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_);
271 for (
int i = 0; i < num_streams_; i++) {
275 L_v_[i] =
rcp(
new crs_matrix_type(Graph_v_[i]->getL_Graph()));
276 U_v_[i] =
rcp(
new crs_matrix_type(Graph_v_[i]->getU_Graph()));
277 L_v_[i]->setAllToScalar(STS::zero());
278 U_v_[i]->setAllToScalar(STS::zero());
280 L_v_[i]->fillComplete();
281 U_v_[i]->fillComplete();
288 template <
class MatrixType>
291 using Details::getParamTryingTypes;
295 const char prefix[] =
"Ifpack2::RILUK: ";
302 double overalloc = 2.;
314 const std::string paramName(
"fact: iluk level-of-fill");
315 getParamTryingTypes<int, int, global_ordinal_type, double, float>(fillLevel, params, paramName, prefix);
320 const std::string paramName(
"fact: absolute threshold");
321 getParamTryingTypes<magnitude_type, magnitude_type, double>(absThresh, params, paramName, prefix);
324 const std::string paramName(
"fact: relative threshold");
325 getParamTryingTypes<magnitude_type, magnitude_type, double>(relThresh, params, paramName, prefix);
328 const std::string paramName(
"fact: relax value");
329 getParamTryingTypes<magnitude_type, magnitude_type, double>(relaxValue, params, paramName, prefix);
332 const std::string paramName(
"fact: iluk overalloc");
333 getParamTryingTypes<double, double>(overalloc, params, paramName, prefix);
337 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
339 static const char typeName[] =
"fact: type";
341 if (!params.
isType<std::string>(typeName))
break;
344 Array<std::string> ilukimplTypeStrs;
345 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
346 Details::IlukImplType::loadPLTypeOption(ilukimplTypeStrs, ilukimplTypeEnums);
348 s2i(ilukimplTypeStrs(), ilukimplTypeEnums(), typeName,
false);
353 if (ilukimplType == Details::IlukImplType::KSPILUK) {
354 this->isKokkosKernelsSpiluk_ =
true;
356 this->isKokkosKernelsSpiluk_ =
false;
360 const std::string paramName(
"fact: kspiluk number-of-streams");
361 getParamTryingTypes<int, int, global_ordinal_type>(nstreams, params, paramName, prefix);
365 L_solver_->setParameters(params);
366 U_solver_->setParameters(params);
372 LevelOfFill_ = fillLevel;
373 Overalloc_ = overalloc;
374 #ifdef KOKKOS_ENABLE_OPENMP
375 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
379 num_streams_ = nstreams;
381 if (num_streams_ >= 1) {
382 this->isKokkosKernelsStream_ =
true;
384 if (params.
isParameter(
"fact: kspiluk reordering in streams"))
385 hasStreamReordered_ = params.
get<
bool>(
"fact: kspiluk reordering in streams");
387 this->isKokkosKernelsStream_ =
false;
395 if (absThresh != -STM::one()) {
396 Athresh_ = absThresh;
398 if (relThresh != -STM::one()) {
399 Rthresh_ = relThresh;
401 if (relaxValue != -STM::one()) {
402 RelaxValue_ = relaxValue;
406 template <
class MatrixType>
412 template <
class MatrixType>
418 template <
class MatrixType>
423 using Teuchos::rcp_dynamic_cast;
424 using Teuchos::rcp_implicit_cast;
429 if (A->getRowMap()->getComm()->getSize() == 1 ||
430 A->getRowMap()->isSameAs(*(A->getColMap()))) {
437 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
439 if (!A_lf_r.is_null()) {
449 template <
class MatrixType>
455 using Teuchos::rcp_const_cast;
456 using Teuchos::rcp_dynamic_cast;
457 using Teuchos::rcp_implicit_cast;
462 typedef Tpetra::Map<local_ordinal_type,
466 const char prefix[] =
"Ifpack2::RILUK::initialize: ";
469 "call setMatrix() with a nonnull input before calling this method.");
471 "fill complete. You may not invoke initialize() or compute() with this "
472 "matrix until the matrix is fill complete. If your matrix is a "
473 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
474 "range Maps, if appropriate) before calling this method.");
477 double startTime = timer.wallTime();
488 isInitialized_ =
false;
489 isAllocated_ =
false;
491 Graph_ = Teuchos::null;
493 reordered_x_ =
nullptr;
494 reordered_y_ =
nullptr;
496 if (isKokkosKernelsStream_) {
497 Graph_v_ = std::vector<Teuchos::RCP<iluk_graph_type> >(num_streams_);
498 A_local_diagblks = std::vector<local_matrix_device_type>(num_streams_);
499 for (
int i = 0; i < num_streams_; i++) {
500 Graph_v_[i] = Teuchos::null;
504 A_local_ = makeLocalFilter(A_);
507 A_local_.is_null(), std::logic_error,
508 "Ifpack2::RILUK::initialize: "
509 "makeLocalFilter returned null; it failed to compute A_local. "
510 "Please report this bug to the Ifpack2 developers.");
519 A_local_crs_ = Details::getCrsMatrix(A_local_);
520 if (A_local_crs_.is_null()) {
521 local_ordinal_type numRows = A_local_->getLocalNumRows();
522 Array<size_t> entriesPerRow(numRows);
523 for (local_ordinal_type i = 0; i < numRows; i++) {
524 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
528 A_local_->getColMap(),
531 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
532 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
533 for (local_ordinal_type i = 0; i < numRows; i++) {
534 size_t numEntries = 0;
535 A_local_->getLocalRowCopy(i, indices, values, numEntries);
536 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
538 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
541 if (!isKokkosKernelsStream_) {
543 LevelOfFill_, 0, Overalloc_));
545 std::vector<int> weights(num_streams_);
546 std::fill(weights.begin(), weights.end(), 1);
547 exec_space_instances_ = Kokkos::Experimental::partition_space(
execution_space(), weights);
549 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
550 if (!hasStreamReordered_) {
551 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks);
553 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks,
true);
554 reverse_perm_v_.resize(perm_v_.size());
555 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
556 using perm_type =
typename lno_nonzero_view_t::non_const_type;
557 const auto perm = perm_v_[istream];
558 const auto perm_length = perm.extent(0);
559 perm_type reverse_perm(
560 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
562 Kokkos::parallel_for(
563 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
564 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
565 reverse_perm(perm(ii)) = ii;
567 reverse_perm_v_[istream] = reverse_perm;
571 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
572 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
573 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
575 for (
int i = 0; i < num_streams_; i++) {
576 A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map;
577 A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries;
578 A_local_diagblks_values_v_[i] = A_local_diagblks[i].values;
581 A_local_diagblks[i].numRows(),
582 A_local_crs_->getRowMap()->getComm()));
584 A_local_diagblks[i].numCols(),
585 A_local_crs_->getColMap()->getComm()));
587 A_local_diagblks_ColMap,
588 A_local_diagblks[i]));
590 LevelOfFill_, 0, Overalloc_));
595 if (this->isKokkosKernelsSpiluk_) {
596 if (!isKokkosKernelsStream_) {
597 this->KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
598 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
599 A_local_->getLocalNumRows(),
600 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
601 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
602 Graph_->initialize(KernelHandle_);
604 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
605 for (
int i = 0; i < num_streams_; i++) {
606 KernelHandle_v_[i] =
Teuchos::rcp(
new kk_handle_type());
607 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
608 A_local_diagblks[i].numRows(),
609 2 * A_local_diagblks[i].nnz() * (LevelOfFill_ + 1),
610 2 * A_local_diagblks[i].nnz() * (LevelOfFill_ + 1));
611 Graph_v_[i]->initialize(KernelHandle_v_[i]);
615 Graph_->initialize();
619 checkOrderingConsistency(*A_local_);
620 if (!isKokkosKernelsStream_) {
621 L_solver_->setMatrix(L_);
623 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
624 L_solver_->setMatrices(L_v_);
626 L_solver_->initialize();
628 if (!isKokkosKernelsStream_) {
629 U_solver_->setMatrix(U_);
631 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
632 U_solver_->setMatrices(U_v_);
634 U_solver_->initialize();
641 isInitialized_ =
true;
643 initializeTime_ += (timer.wallTime() - startTime);
646 template <
class MatrixType>
654 bool gidsAreConsistentlyOrdered =
true;
655 global_ordinal_type indexOfInconsistentGID = 0;
656 for (global_ordinal_type i = 0; i < rowGIDs.
size(); ++i) {
657 if (rowGIDs[i] != colGIDs[i]) {
658 gidsAreConsistentlyOrdered =
false;
659 indexOfInconsistentGID = i;
664 "The ordering of the local GIDs in the row and column maps is not the same"
666 <<
"at index " << indexOfInconsistentGID
667 <<
". Consistency is required, as all calculations are done with"
669 <<
"local indexing.");
672 template <
class MatrixType>
673 void RILUK<MatrixType>::
674 initAllValues(
const row_matrix_type& A) {
680 using Teuchos::reduceAll;
681 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
683 size_t NumIn = 0, NumL = 0, NumU = 0;
684 bool DiagFound =
false;
685 size_t NumNonzeroDiags = 0;
686 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
690 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
693 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
698 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
703 L_->setAllToScalar(STS::zero());
704 U_->setAllToScalar(STS::zero());
707 D_->putScalar(STS::zero());
708 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
710 RCP<const map_type> rowMap = L_->getRowMap();
721 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
722 local_ordinal_type local_row = myRow;
726 A.getLocalRowCopy(local_row, InI, InV, NumIn);
734 for (
size_t j = 0; j < NumIn; ++j) {
735 const local_ordinal_type k = InI[j];
737 if (k == local_row) {
740 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
743 true, std::runtime_error,
744 "Ifpack2::RILUK::initAllValues: current "
746 << k <<
" < 0. I'm not sure why this is an error; it is "
747 "probably an artifact of the undocumented assumptions of the "
748 "original implementation (likely copied and pasted from Ifpack). "
749 "Nevertheless, the code I found here insisted on this being an error "
750 "state, so I will throw an exception here.");
751 }
else if (k < local_row) {
755 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
767 DV(local_row) = Athresh_;
772 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
776 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
782 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
786 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
794 isInitialized_ =
true;
797 template <
class MatrixType>
798 void RILUK<MatrixType>::compute_serial() {
801 initAllValues(*A_local_);
806 const scalar_type MinDiagonalValue = STS::rmin();
807 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
809 size_t NumIn, NumL, NumU;
812 const size_t MaxNumEntries =
813 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
817 size_t num_cols = U_->getColMap()->getLocalNumElements();
820 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
824 using IST =
typename row_matrix_type::impl_scalar_type;
825 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
826 local_ordinal_type local_row = i;
829 local_inds_host_view_type UUI;
830 values_host_view_type UUV;
834 NumIn = MaxNumEntries;
835 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
836 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()), MaxNumEntries);
838 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
841 InI[NumL] = local_row;
843 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
844 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
846 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
847 NumIn = NumL + NumU + 1;
850 for (
size_t j = 0; j < NumIn; ++j) {
854 scalar_type diagmod = STS::zero();
856 for (
size_t jj = 0; jj < NumL; ++jj) {
857 local_ordinal_type j = InI[jj];
858 IST multiplier = InV[jj];
860 InV[jj] *=
static_cast<scalar_type
>(DV(j));
862 U_->getLocalRowView(j, UUI, UUV);
865 if (RelaxValue_ == STM::zero()) {
866 for (
size_t k = 0; k < NumUU; ++k) {
867 const int kk = colflag[UUI[k]];
872 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
877 for (
size_t k = 0; k < NumUU; ++k) {
881 const int kk = colflag[UUI[k]];
883 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
885 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
893 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
898 if (RelaxValue_ != STM::zero()) {
899 DV(i) += RelaxValue_ * diagmod;
902 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
903 if (STS::real(DV(i)) < STM::zero()) {
904 DV(i) = -MinDiagonalValue;
906 DV(i) = MinDiagonalValue;
909 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
912 for (
size_t j = 0; j < NumU; ++j) {
913 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
918 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
922 for (
size_t j = 0; j < NumIn; ++j) {
923 colflag[InI[j]] = -1;
934 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
935 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
938 L_solver_->setMatrix(L_);
939 L_solver_->compute();
940 U_solver_->setMatrix(U_);
941 U_solver_->compute();
944 template <
class MatrixType>
945 void RILUK<MatrixType>::compute_kkspiluk() {
949 L_->setAllToScalar(STS::zero());
950 U_->setAllToScalar(STS::zero());
952 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
953 auto lclL = L_->getLocalMatrixDevice();
954 row_map_type L_rowmap = lclL.graph.row_map;
955 auto L_entries = lclL.graph.entries;
956 auto L_values = lclL.values;
958 auto lclU = U_->getLocalMatrixDevice();
959 row_map_type U_rowmap = lclU.graph.row_map;
960 auto U_entries = lclU.graph.entries;
961 auto U_values = lclU.values;
963 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
964 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
965 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
966 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
968 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
969 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
971 L_solver_->compute();
972 U_solver_->compute();
975 template <
class MatrixType>
976 void RILUK<MatrixType>::compute_kkspiluk_stream() {
977 for (
int i = 0; i < num_streams_; i++) {
978 L_v_[i]->resumeFill();
979 U_v_[i]->resumeFill();
981 L_v_[i]->setAllToScalar(STS::zero());
982 U_v_[i]->setAllToScalar(STS::zero());
984 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
985 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
986 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
987 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
988 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
989 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
990 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
991 for (
int i = 0; i < num_streams_; i++) {
992 auto lclL = L_v_[i]->getLocalMatrixDevice();
993 L_rowmap_v[i] = lclL.graph.row_map;
994 L_entries_v[i] = lclL.graph.entries;
995 L_values_v[i] = lclL.values;
997 auto lclU = U_v_[i]->getLocalMatrixDevice();
998 U_rowmap_v[i] = lclU.graph.row_map;
999 U_entries_v[i] = lclU.graph.entries;
1000 U_values_v[i] = lclU.values;
1001 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1005 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1008 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1009 const auto A_nrows = lclMtx.numRows();
1010 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1011 ? (A_nrows / num_streams_)
1012 : (A_nrows / num_streams_ + 1);
1013 for (
int i = 0; i < num_streams_; i++) {
1014 const auto start_row_offset = i * rows_per_block;
1015 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1016 auto colindices = A_local_diagblks_entries_v_[i];
1017 auto values = A_local_diagblks_values_v_[i];
1018 const bool reordered = hasStreamReordered_;
1019 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1020 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1021 Kokkos::parallel_for(
1022 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1023 const auto irow = team.league_rank();
1024 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1025 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1026 const auto begin_row = rowptrs(irow);
1027 const auto num_entries = rowptrs(irow + 1) - begin_row;
1028 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1029 const auto colidx = colindices(begin_row + j);
1030 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1032 const int offset = KokkosSparse::findRelOffset(
1033 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1034 values(begin_row + j) = A_local_crs_row.value(offset);
1040 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1041 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1042 L_rowmap_v, L_entries_v, L_values_v,
1043 U_rowmap_v, U_entries_v, U_values_v);
1044 for (
int i = 0; i < num_streams_; i++) {
1045 L_v_[i]->fillComplete();
1046 U_v_[i]->fillComplete();
1049 L_solver_->compute();
1050 U_solver_->compute();
1053 template <
class MatrixType>
1059 using Teuchos::rcp_const_cast;
1060 using Teuchos::rcp_dynamic_cast;
1061 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1067 "call setMatrix() with a nonnull input before calling this method.");
1069 "fill complete. You may not invoke initialize() or compute() with this "
1070 "matrix until the matrix is fill complete. If your matrix is a "
1071 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1072 "range Maps, if appropriate) before calling this method.");
1074 if (!isInitialized()) {
1082 double startTime = timer.
wallTime();
1084 isComputed_ =
false;
1086 if (!this->isKokkosKernelsSpiluk_) {
1090 if (!A_local_crs_nc_.is_null()) {
1091 A_local_crs_nc_->resumeFill();
1093 Array<size_t> entriesPerRow(numRows);
1095 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
1098 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1099 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1101 size_t numEntries = 0;
1102 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1103 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1105 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1108 if (!isKokkosKernelsStream_) {
1111 compute_kkspiluk_stream();
1117 computeTime_ += (timer.
wallTime() - startTime);
1121 template <
typename MV,
typename Map>
1122 void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1123 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1124 mv_ptr.reset(
new MV(map, numVectors, initialize));
1129 template <
class MatrixType>
1131 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1132 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1137 using Teuchos::rcpFromRef;
1140 A_.
is_null(), std::runtime_error,
1141 "Ifpack2::RILUK::apply: The matrix is "
1142 "null. Please call setMatrix() with a nonnull input, then initialize() "
1143 "and compute(), before calling this method.");
1145 !isComputed(), std::runtime_error,
1146 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1147 "you must call compute() before calling this method.");
1148 TEUCHOS_TEST_FOR_EXCEPTION(
1149 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1150 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1151 "X.getNumVectors() = "
1152 << X.getNumVectors()
1153 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1154 TEUCHOS_TEST_FOR_EXCEPTION(
1156 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1157 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1158 "fixed. There is a FIXME in this file about this very issue.");
1159 #ifdef HAVE_IFPACK2_DEBUG
1161 if (!isKokkosKernelsStream_) {
1163 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1168 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1169 if (STM::isnaninf(norms[j])) {
1174 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1176 #endif // HAVE_IFPACK2_DEBUG
1182 double startTime = timer.
wallTime();
1185 if (alpha == one && beta == zero) {
1186 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && hasStreamReordered_) {
1187 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1188 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1190 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1191 auto X_j = X.getVector(j);
1192 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1193 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1194 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1197 for (
int i = 0; i < num_streams_; i++) {
1198 auto perm_i = perm_v_[i];
1199 stream_end = stream_begin + perm_i.extent(0);
1200 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1201 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1202 Kokkos::parallel_for(
1203 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1204 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1206 stream_begin = stream_end;
1212 L_solver_->apply(*reordered_x_, Y, mode);
1214 U_solver_->apply(Y, *reordered_y_, mode);
1217 U_solver_->apply(*reordered_x_, Y, mode);
1219 L_solver_->apply(Y, *reordered_y_, mode);
1222 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1223 auto Y_j = Y.getVectorNonConst(j);
1224 auto ReorderedY_j = reordered_y_->getVector(j);
1225 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1226 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1229 for (
int i = 0; i < num_streams_; i++) {
1230 auto perm_i = perm_v_[i];
1231 stream_end = stream_begin + perm_i.extent(0);
1232 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1233 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1234 Kokkos::parallel_for(
1235 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1236 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1238 stream_begin = stream_end;
1244 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1248 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1251 L_solver_->apply(X, *Y_tmp_, mode);
1253 if (!this->isKokkosKernelsSpiluk_) {
1256 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1259 U_solver_->apply(*Y_tmp_, Y, mode);
1262 L_solver_->apply(X, Y, mode);
1264 if (!this->isKokkosKernelsSpiluk_) {
1267 Y.elementWiseMultiply(one, *D_, Y, zero);
1270 U_solver_->apply(Y, Y, mode);
1273 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1277 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1280 U_solver_->apply(X, *Y_tmp_, mode);
1282 if (!this->isKokkosKernelsSpiluk_) {
1288 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1291 L_solver_->apply(*Y_tmp_, Y, mode);
1294 U_solver_->apply(X, Y, mode);
1296 if (!this->isKokkosKernelsSpiluk_) {
1302 Y.elementWiseMultiply(one, *D_, Y, zero);
1305 L_solver_->apply(Y, Y, mode);
1310 if (alpha == zero) {
1320 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1321 apply(X, *Y_tmp_, mode);
1322 Y.update(alpha, *Y_tmp_, beta);
1327 #ifdef HAVE_IFPACK2_DEBUG
1332 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1333 if (STM::isnaninf(norms[j])) {
1338 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1340 #endif // HAVE_IFPACK2_DEBUG
1343 applyTime_ += (timer.
wallTime() - startTime);
1379 template <
class MatrixType>
1381 std::ostringstream os;
1386 os <<
"\"Ifpack2::RILUK\": {";
1387 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1388 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1390 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1392 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1393 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1396 os <<
"Matrix: null";
1398 os <<
"Global matrix dimensions: ["
1399 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1400 <<
", Global nnz: " << A_->getGlobalNumEntries();
1403 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1404 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1412 #define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1413 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:228
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:241
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition: Ifpack2_RILUK_def.hpp:420
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:229
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:166
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:450
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:248
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:117
std::string description() const
A one-line description of this object.
Definition: Ifpack2_RILUK_def.hpp:1380
void setParameters(const Teuchos::ParameterList ¶ms)
Definition: Ifpack2_RILUK_def.hpp:290
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_RILUK_def.hpp:191
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition: Ifpack2_RILUK_def.hpp:414
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:250
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:179
bool isParameter(const std::string &name) const
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:232
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:208
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:226
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_RILUK_def.hpp:94
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:1054
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_RILUK_def.hpp:1131
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_RILUK_decl.hpp:235
void resize(size_type new_size, const value_type &x=value_type())
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition: Ifpack2_RILUK_def.hpp:408
bool isType(const std::string &name) const
Declaration of MDF interface.
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:127
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:150
std::string typeName(const T &t)
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:223