10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
23 #include "MueLu_AmalgamationInfo.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
29 #include "MueLu_Utilities.hpp"
37 #include "MueLu_ScalarDroppingClassical.hpp"
38 #include "MueLu_ScalarDroppingDistanceLaplacian.hpp"
40 #include "MueLu_VectorDroppingClassical.hpp"
41 #include "MueLu_VectorDroppingDistanceLaplacian.hpp"
45 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
49 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
63 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
78 SET_VALID_ENTRY(
"filtered matrix: spread lumping diag dom growth factor");
82 #undef SET_VALID_ENTRY
83 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
84 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
87 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"point-wise",
"cut-drop",
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical",
"signed classical distance laplacian",
"signed classical sa distance laplacian"))));
92 validParamList->
getEntry(
"aggregation: strength-of-connection: measure").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"smoothed aggregation",
"signed smoothed aggregation",
"signed ruge-stueben",
"unscaled"))));
96 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
101 return validParamList;
104 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
106 Input(currentLevel,
"A");
107 Input(currentLevel,
"UnAmalgamationInfo");
111 std::string socUsesMatrix = pL.
get<std::string>(
"aggregation: strength-of-connection: matrix");
112 bool needCoords = (socUsesMatrix ==
"distance laplacian");
113 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
114 std::string droppingMethod = pL.
get<std::string>(
"aggregation: drop scheme");
115 needCoords |= (droppingMethod.find(
"distance laplacian") != std::string::npos);
118 Input(currentLevel,
"Coordinates");
119 std::string distLaplMetric = pL.
get<std::string>(
"aggregation: distance laplacian metric");
120 if (distLaplMetric ==
"material")
121 Input(currentLevel,
"Material");
124 bool useBlocking = pL.
get<
bool>(
"aggregation: use blocking");
125 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
126 useBlocking |= (droppingMethod.find(
"block diagonal") != std::string::npos);
129 Input(currentLevel,
"BlockNumber");
133 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
136 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
138 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
140 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
142 results = BuildScalar(currentLevel);
144 results = BuildVector(currentLevel);
148 auto boundaryNodes = std::get<1>(results);
150 GO numLocalBoundaryNodes = 0;
152 Kokkos::parallel_reduce(
153 "MueLu:CoalesceDropF:Build:bnd",
range_type(0, boundaryNodes.extent(0)),
154 KOKKOS_LAMBDA(
const LO i,
GO& n) {
155 if (boundaryNodes(i))
158 numLocalBoundaryNodes);
161 auto comm = A->getRowMap()->getComm();
163 std::vector<GlobalOrdinal> localStats = {numLocalBoundaryNodes, numDropped};
164 std::vector<GlobalOrdinal> globalStats(2);
167 GO numGlobalTotal = A->getGlobalNumEntries();
168 GO numGlobalBoundaryNodes = globalStats[0];
169 GO numGlobalDropped = globalStats[1];
171 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
172 if (numGlobalTotal != 0) {
173 GetOStream(
Statistics1) <<
"Number of dropped entries: "
174 << numGlobalDropped <<
"/" << numGlobalTotal
175 <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
181 template <
class local_matrix_type,
class boundary_nodes_view,
class... Functors>
182 void runBoundaryFunctors(local_matrix_type& lclA, boundary_nodes_view& boundaryNodes, Functors&... functors) {
183 using local_ordinal_type =
typename local_matrix_type::ordinal_type;
184 using execution_space =
typename local_matrix_type::execution_space;
185 using range_type = Kokkos::RangePolicy<local_ordinal_type, execution_space>;
186 auto range = range_type(0, boundaryNodes.extent(0));
188 Kokkos::parallel_for(
"CoalesceDrop::BoundaryDetection", range, boundaries);
191 template <
class magnitudeType>
193 std::set<std::string> validDroppingMethods = {
"piece-wise",
"cut-drop"};
197 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
198 std::string algo = droppingMethod;
199 std::string classicalAlgoStr = pL.
get<std::string>(
"aggregation: classical algo");
200 std::string distanceLaplacianAlgoStr = pL.
get<std::string>(
"aggregation: distance laplacian algo");
203 if (algo.find(
"block diagonal") == 0) {
205 algo = algo.substr(14);
207 algo = algo.substr(1);
211 if ((algo ==
"classical") || (algo ==
"signed classical sa") || (algo ==
"signed classical") || (algo ==
"colored signed classical")) {
214 if (algo ==
"classical") {
215 socUsesMeasure =
"smoothed aggregation";
216 }
else if (algo ==
"signed classical sa") {
217 socUsesMeasure =
"signed smoothed aggregation";
218 }
else if (algo ==
"signed classical") {
219 socUsesMeasure =
"signed ruge-stueben";
220 }
else if (algo ==
"colored signed classical") {
221 socUsesMeasure =
"signed ruge-stueben";
222 generateColoringGraph =
true;
225 if (classicalAlgoStr ==
"default")
226 droppingMethod =
"point-wise";
227 else if (classicalAlgoStr ==
"unscaled cut") {
228 socUsesMeasure =
"unscaled";
229 droppingMethod =
"cut-drop";
230 }
else if (classicalAlgoStr ==
"scaled cut") {
231 droppingMethod =
"cut-drop";
232 }
else if (classicalAlgoStr ==
"scaled cut symmetric") {
233 droppingMethod =
"cut-drop";
234 symmetrizeDroppedGraph =
true;
236 }
else if ((algo ==
"distance laplacian") || (algo ==
"signed classical sa distance laplacian") || (algo ==
"signed classical distance laplacian")) {
237 socUsesMatrix =
"distance laplacian";
239 if (algo ==
"distance laplacian") {
240 socUsesMeasure =
"smoothed aggregation";
241 }
else if (algo ==
"signed classical sa distance laplacian") {
242 socUsesMeasure =
"signed smoothed aggregation";
243 }
else if (algo ==
"signed classical distance laplacian") {
244 socUsesMeasure =
"signed ruge-stueben";
247 if (distanceLaplacianAlgoStr ==
"default")
248 droppingMethod =
"point-wise";
249 else if (distanceLaplacianAlgoStr ==
"unscaled cut") {
250 socUsesMeasure =
"unscaled";
251 droppingMethod =
"cut-drop";
252 }
else if (distanceLaplacianAlgoStr ==
"scaled cut") {
253 droppingMethod =
"cut-drop";
254 }
else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric") {
255 droppingMethod =
"cut-drop";
256 symmetrizeDroppedGraph =
true;
258 }
else if (algo ==
"") {
266 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
273 using local_matrix_type =
typename MatrixType::local_matrix_type;
274 using local_graph_type =
typename GraphType::local_graph_type;
275 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
276 using entries_type =
typename local_graph_type::entries_type::non_const_type;
277 using values_type =
typename local_matrix_type::values_type::non_const_type;
278 using device_type =
typename Node::device_type;
279 using memory_space =
typename device_type::memory_space;
280 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
287 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
294 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
295 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
299 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
300 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
301 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
302 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
303 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
304 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
305 magnitudeType threshold;
307 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
308 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
310 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
311 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
314 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
315 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
317 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
318 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
319 const std::string lumpingChoiceString = pL.get<std::string>(
"filtered matrix: lumping choice");
321 if (lumpingChoiceString ==
"diag lumping")
323 else if (lumpingChoiceString ==
"distributed lumping")
326 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
329 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
330 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
331 const bool symmetrizeColoringGraph =
true;
333 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
334 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
338 std::stringstream ss;
339 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
340 if (socUsesMatrix ==
"distance laplacian")
341 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
342 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
343 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
351 if (droppingMethod ==
"cut-drop")
365 auto crsA = toCrsMatrix(A);
366 auto lclA = crsA->getLocalMatrixDevice();
386 if (rowSumTol <= 0.) {
419 auto filtered_rowptr = rowptr_type(
"filtered_rowptr", lclA.numRows() + 1);
423 auto results = results_view_type(
"results", lclA.nnz());
427 if (threshold != zero) {
428 if (socUsesMatrix ==
"A") {
429 if (socUsesMeasure ==
"unscaled") {
430 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
431 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
432 }
else if (socUsesMeasure ==
"smoothed aggregation") {
433 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
434 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
435 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
436 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
437 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
438 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
439 ScalarDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold,
440 aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
442 }
else if (socUsesMatrix ==
"distance laplacian") {
443 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
444 if (socUsesMeasure ==
"unscaled") {
445 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
446 }
else if (socUsesMeasure ==
"smoothed aggregation") {
447 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
448 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
449 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
450 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
451 ScalarDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, results, filtered_rowptr, nnz_filtered, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, currentLevel, *
this);
455 Kokkos::deep_copy(results,
KEEP);
457 if (symmetrizeDroppedGraph) {
459 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_boundaries);
462 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, no_op);
466 if (symmetrizeDroppedGraph) {
468 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
471 GO numDropped = lclA.nnz() - nnz_filtered;
484 local_matrix_type lclFilteredA;
485 local_graph_type lclGraph;
487 filteredA = MatrixFactory::BuildCopy(A);
488 lclFilteredA = filteredA->getLocalMatrixDevice();
490 auto colidx = entries_type(
"entries", nnz_filtered);
491 lclGraph = local_graph_type(colidx, filtered_rowptr);
493 auto colidx = entries_type(
"entries", nnz_filtered);
494 auto values = values_type(
"values", nnz_filtered);
495 lclFilteredA = local_matrix_type(
"filteredA",
496 lclA.numRows(), lclA.numCols(),
498 values, filtered_rowptr, colidx);
504 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
508 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
511 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
517 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
520 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
525 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
526 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
528 if (reuseEigenvalue) {
533 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
540 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
542 graph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"amalgamated graph of A"));
547 if (generateColoringGraph) {
548 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
550 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
551 if (localizeColoringGraph) {
553 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, drop_offrank);
555 if (symmetrizeColoringGraph) {
557 ScalarDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, results, filtered_rowptr, nnz_filtered, useBlocking, currentLevel, *
this, symmetrize);
559 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
560 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
562 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
564 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
565 Set(currentLevel,
"Coloring Graph", colorGraph);
569 Set(currentLevel,
"DofsPerNode", dofsPerNode);
570 Set(currentLevel,
"Graph", graph);
571 Set(currentLevel,
"A", filteredA);
573 return std::make_tuple(numDropped, boundaryNodes);
576 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
583 using local_matrix_type =
typename MatrixType::local_matrix_type;
584 using local_graph_type =
typename GraphType::local_graph_type;
585 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
586 using entries_type =
typename local_graph_type::entries_type::non_const_type;
587 using values_type =
typename local_matrix_type::values_type::non_const_type;
588 using device_type =
typename Node::device_type;
589 using memory_space =
typename device_type::memory_space;
590 using results_view_type = Kokkos::View<DecisionType*, memory_space>;
597 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
617 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
619 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel,
"UnAmalgamationInfo");
631 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
632 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
634 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
635 rowTranslationView(rowTranslationArray.
getRawPtr(), rowTranslationArray.
size());
636 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
637 colTranslationView(colTranslationArray.
getRawPtr(), colTranslationArray.
size());
640 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
641 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
642 id_translation_type rowTranslation(
"dofId2nodeId", rowTranslationArray.
size());
643 id_translation_type colTranslation(
"ov_dofId2nodeId", colTranslationArray.
size());
644 Kokkos::deep_copy(rowTranslation, rowTranslationView);
645 Kokkos::deep_copy(colTranslation, colTranslationView);
648 blkSize = A->GetFixedBlockSize();
651 if (A->IsView(
"stridedMaps") ==
true) {
655 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
656 blkId = strMap->getStridedBlockId();
658 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
668 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
669 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
671 const bool useGreedyDirichlet = pL.get<
bool>(
"aggregation: greedy Dirichlet");
675 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
676 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
677 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
678 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
679 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
680 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
681 magnitudeType threshold;
683 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
684 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
686 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
687 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
690 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
691 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
693 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
694 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
695 const std::string lumpingChoiceString = pL.get<std::string>(
"filtered matrix: lumping choice");
697 if (lumpingChoiceString ==
"diag lumping")
699 else if (lumpingChoiceString ==
"distributed lumping")
702 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
705 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
706 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
707 const bool symmetrizeColoringGraph =
true;
709 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
710 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold, lumpingChoice);
713 std::stringstream ss;
714 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
715 if (socUsesMatrix ==
"distance laplacian")
716 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
717 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
718 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
726 if (droppingMethod ==
"cut-drop")
727 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0,
Exceptions::RuntimeError,
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold <<
", needs to be <= 1.0");
740 auto crsA = toCrsMatrix(A);
741 auto lclA = crsA->getLocalMatrixDevice();
756 if (useGreedyDirichlet) {
786 auto filtered_rowptr = rowptr_type(
"rowptr", lclA.numRows() + 1);
787 auto graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
789 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
792 auto results = results_view_type(
"results", lclA.nnz());
801 auto merged_rowptr = rowptr_type(
"rowptr", numNodes + 1);
805 Kokkos::parallel_scan(
"MergeCount", range, functor, nnz_merged);
807 local_graph_type lclMergedGraph;
808 auto colidx_merged = entries_type(
"entries", nnz_merged);
809 auto values_merged = values_type(
"values", nnz_merged);
811 local_matrix_type lclMergedA = local_matrix_type(
"mergedA",
812 numNodes, nonUniqueMap->getLocalNumElements(),
814 values_merged, merged_rowptr, colidx_merged);
817 Kokkos::parallel_for(
"MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
822 if (threshold != zero) {
823 if (socUsesMatrix ==
"A") {
824 if (socUsesMeasure ==
"unscaled") {
825 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
826 }
else if (socUsesMeasure ==
"smoothed aggregation") {
827 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
828 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
829 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
830 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
831 VectorDroppingClassical<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_A(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, currentLevel, *
this);
833 }
else if (socUsesMatrix ==
"distance laplacian") {
834 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
837 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
838 if (socUsesMeasure ==
"distance laplacian") {
839 LO dim = (
LO)coords->getNumVectors();
841 bool non_unity =
false;
842 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.size(); i++) {
843 if (dlap_weights[i] != 1.0) {
848 if ((
LO)dlap_weights.size() == dim) {
849 distanceLaplacianMetric =
"weighted";
850 }
else if ((
LO)dlap_weights.size() == interleaved_blocksize * dim)
851 distanceLaplacianMetric =
"block weighted";
854 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
857 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
861 if (socUsesMeasure ==
"unscaled") {
862 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::UnscaledMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
863 }
else if (socUsesMeasure ==
"smoothed aggregation") {
864 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
865 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
866 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedRugeStuebenMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
867 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
868 VectorDroppingDistanceLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node, Misc::SignedSmoothedAggregationMeasure>::runDroppingFunctors_on_dlap(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, boundaryNodes, droppingMethod, threshold, aggregationMayCreateDirichlet, symmetrizeDroppedGraph, useBlocking, distanceLaplacianMetric, dlap_weights, interleaved_blocksize, currentLevel, *
this);
872 Kokkos::deep_copy(results,
KEEP);
875 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, no_op);
878 if (symmetrizeDroppedGraph) {
880 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
885 GO numTotal = lclA.nnz();
886 GO numDropped = numTotal - nnz_filtered;
899 local_matrix_type lclFilteredA;
901 lclFilteredA = local_matrix_type(
"filteredA", lclA.graph, lclA.numCols());
903 auto colidx = entries_type(
"entries", nnz_filtered);
904 auto values = values_type(
"values", nnz_filtered);
905 lclFilteredA = local_matrix_type(
"filteredA",
906 lclA.numRows(), lclA.numCols(),
908 values, filtered_rowptr, colidx);
911 local_graph_type lclGraph;
913 auto colidx = entries_type(
"entries", nnz_graph);
914 lclGraph = local_graph_type(colidx, graph_rowptr);
920 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
923 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
928 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
931 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
936 filteredA->SetFixedBlockSize(blkSize);
938 if (reuseEigenvalue) {
943 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
948 graph =
rcp(
new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
953 if (generateColoringGraph) {
954 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
956 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
957 graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
958 if (localizeColoringGraph) {
960 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, drop_offrank);
962 if (symmetrizeColoringGraph) {
964 VectorDroppingBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::template runDroppingFunctors<>(*A, *mergedA, blkPartSize, rowTranslation, colTranslation, results, filtered_rowptr, graph_rowptr, nnz, useBlocking, currentLevel, *
this, symmetrize);
966 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
967 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
969 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
971 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
972 Set(currentLevel,
"Coloring Graph", colorGraph);
975 LO dofsPerNode = blkSize;
977 Set(currentLevel,
"DofsPerNode", dofsPerNode);
978 Set(currentLevel,
"Graph", graph);
979 Set(currentLevel,
"A", filteredA);
981 return std::make_tuple(numDropped, boundaryNodes);
985 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold, MueLu::MatrixConstruction::lumpingType &lumpingChoice)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that drops boundary nodes for a blockSize == 1 problem.
void runBoundaryFunctors(local_matrix_type &lclA, boundary_nodes_view &boundaryNodes, Functors &...functors)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops off-rank entries.
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static void runDroppingFunctors_on_A(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level ¤tLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
static void runDroppingFunctors_on_dlap(matrix_type &A, matrix_type &mergedA, LocalOrdinal blkPartSize, block_indices_view_type &rowTranslation, block_indices_view_type &colTranslation, results_view &results, rowptr_type &filtered_rowptr, rowptr_type &graph_rowptr, nnz_count_type &nnz, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Teuchos::Array< double > &dlap_weights, LocalOrdinal interleaved_blocksize, Level &level, const Factory &factory)
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level ¤tLevel) const
static void runDroppingFunctors_on_dlap(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, const std::string &distanceLaplacianMetric, Level &level, const Factory &factory)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level ¤tLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
Functor that serially applies sub-functors to rows.
static void runDroppingFunctors_on_A(matrix_type &A, results_view &results, rowptr_type &filtered_rowptr, LocalOrdinal &nnz_filtered, boundary_nodes_type &boundaryNodes, const std::string &droppingMethod, const magnitudeType threshold, const bool aggregationMayCreateDirichlet, const bool symmetrizeDroppedGraph, const bool useBlocking, Level &level, const Factory &factory)
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level ¤tLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)