10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
23 #include "MueLu_AmalgamationInfo.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
41 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
45 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
59 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
73 SET_VALID_ENTRY(
"filtered matrix: spread lumping diag dom growth factor");
77 #undef SET_VALID_ENTRY
78 validParamList->
set<
bool>(
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
79 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
82 validParamList->
getEntry(
"aggregation: drop scheme").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"point-wise",
"cut-drop",
"signed classical sa",
"classical",
"distance laplacian",
"signed classical",
"block diagonal",
"block diagonal classical",
"block diagonal distance laplacian",
"block diagonal signed classical",
"block diagonal colored signed classical",
"signed classical distance laplacian",
"signed classical sa distance laplacian"))));
87 validParamList->
getEntry(
"aggregation: strength-of-connection: measure").
setValidator(
rcp(
new Teuchos::StringValidator(Teuchos::tuple<std::string>(
"smoothed aggregation",
"signed smoothed aggregation",
"signed ruge-stueben",
"unscaled"))));
91 validParamList->
set<
RCP<const FactoryBase>>(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
96 return validParamList;
99 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
101 Input(currentLevel,
"A");
102 Input(currentLevel,
"UnAmalgamationInfo");
106 std::string socUsesMatrix = pL.
get<std::string>(
"aggregation: strength-of-connection: matrix");
107 bool needCoords = (socUsesMatrix ==
"distance laplacian");
108 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
109 std::string droppingMethod = pL.
get<std::string>(
"aggregation: drop scheme");
110 needCoords |= (droppingMethod.find(
"distance laplacian") != std::string::npos);
113 Input(currentLevel,
"Coordinates");
114 std::string distLaplMetric = pL.
get<std::string>(
"aggregation: distance laplacian metric");
115 if (distLaplMetric ==
"material")
116 Input(currentLevel,
"Material");
119 bool useBlocking = pL.
get<
bool>(
"aggregation: use blocking");
120 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
121 useBlocking |= (droppingMethod.find(
"block diagonal") != std::string::npos);
124 Input(currentLevel,
"BlockNumber");
128 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
131 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
133 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
135 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
137 results = BuildScalar(currentLevel);
139 results = BuildVector(currentLevel);
143 auto boundaryNodes = std::get<1>(results);
145 GO numLocalBoundaryNodes = 0;
146 GO numGlobalBoundaryNodes = 0;
148 Kokkos::parallel_reduce(
149 "MueLu:CoalesceDropF:Build:bnd",
range_type(0, boundaryNodes.extent(0)),
150 KOKKOS_LAMBDA(
const LO i,
GO& n) {
151 if (boundaryNodes(i))
154 numLocalBoundaryNodes);
156 auto comm = A->getRowMap()->getComm();
157 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
159 GO numGlobalTotal = A->getGlobalNumEntries();
163 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
164 if (numGlobalTotal != 0) {
165 GetOStream(
Statistics1) <<
"Number of dropped entries: "
166 << numGlobalDropped <<
"/" << numGlobalTotal
167 <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
172 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
175 auto material = Get<RCP<MultiVector>>(currentLevel,
"Material");
178 if (material->getNumVectors() == 1) {
179 GetOStream(
Runtime0) <<
"material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
184 material->meanValue(means());
185 std::stringstream ss;
186 ss <<
"material tensor mean =" << std::endl;
188 for (
size_t i = 0; i < spatialDim; ++i) {
190 for (
size_t j = 0; j < spatialDim; ++j) {
191 ss << means[k] <<
" ";
204 template <
class magnitudeType>
205 void translateOldAlgoParam(
const Teuchos::ParameterList& pL, std::string& droppingMethod,
bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure,
bool& symmetrizeDroppedGraph,
bool& generateColoringGraph, magnitudeType& threshold) {
206 std::set<std::string> validDroppingMethods = {
"piece-wise",
"cut-drop"};
207 if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
208 std::string algo = droppingMethod;
209 std::string classicalAlgoStr = pL.
get<std::string>(
"aggregation: classical algo");
210 std::string distanceLaplacianAlgoStr = pL.
get<std::string>(
"aggregation: distance laplacian algo");
213 if (algo.find(
"block diagonal") == 0) {
215 algo = algo.substr(14);
217 algo = algo.substr(1);
221 if ((algo ==
"classical") || (algo ==
"signed classical sa") || (algo ==
"signed classical") || (algo ==
"colored signed classical")) {
224 if (algo ==
"classical") {
225 socUsesMeasure =
"smoothed aggregation";
226 }
else if (algo ==
"signed classical sa") {
227 socUsesMeasure =
"signed smoothed aggregation";
228 }
else if (algo ==
"signed classical") {
229 socUsesMeasure =
"signed ruge-stueben";
230 }
else if (algo ==
"colored signed classical") {
231 socUsesMeasure =
"signed ruge-stueben";
232 generateColoringGraph =
true;
235 if (classicalAlgoStr ==
"default")
236 droppingMethod =
"point-wise";
237 else if (classicalAlgoStr ==
"unscaled cut") {
238 socUsesMeasure =
"unscaled";
239 droppingMethod =
"cut-drop";
240 }
else if (classicalAlgoStr ==
"scaled cut") {
241 droppingMethod =
"cut-drop";
242 }
else if (classicalAlgoStr ==
"scaled cut symmetric") {
243 droppingMethod =
"cut-drop";
244 symmetrizeDroppedGraph =
true;
246 }
else if ((algo ==
"distance laplacian") || (algo ==
"signed classical sa distance laplacian") || (algo ==
"signed classical distance laplacian")) {
247 socUsesMatrix =
"distance laplacian";
249 if (algo ==
"distance laplacian") {
250 socUsesMeasure =
"smoothed aggregation";
251 }
else if (algo ==
"signed classical sa distance laplacian") {
252 socUsesMeasure =
"signed smoothed aggregation";
253 }
else if (algo ==
"signed classical distance laplacian") {
254 socUsesMeasure =
"signed ruge-stueben";
257 if (distanceLaplacianAlgoStr ==
"default")
258 droppingMethod =
"point-wise";
259 else if (distanceLaplacianAlgoStr ==
"unscaled cut") {
260 socUsesMeasure =
"unscaled";
261 droppingMethod =
"cut-drop";
262 }
else if (distanceLaplacianAlgoStr ==
"scaled cut") {
263 droppingMethod =
"cut-drop";
264 }
else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric") {
265 droppingMethod =
"cut-drop";
266 symmetrizeDroppedGraph =
true;
268 }
else if (algo ==
"") {
276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
283 using local_matrix_type =
typename MatrixType::local_matrix_type;
284 using local_graph_type =
typename GraphType::local_graph_type;
285 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
286 using entries_type =
typename local_graph_type::entries_type::non_const_type;
287 using values_type =
typename local_matrix_type::values_type::non_const_type;
288 using device_type =
typename Node::device_type;
289 using memory_space =
typename device_type::memory_space;
296 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
303 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
304 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
308 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
309 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
310 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
311 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
312 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
313 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
314 magnitudeType threshold;
316 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
317 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
319 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
320 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
323 const bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
324 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
325 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
327 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
328 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
330 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
333 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
334 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
335 const bool symmetrizeColoringGraph =
true;
337 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
338 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
342 std::stringstream ss;
343 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
344 if (socUsesMatrix ==
"distance laplacian")
345 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
346 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
347 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
354 if (droppingMethod ==
"cut-drop")
368 auto crsA = toCrsMatrix(A);
369 auto lclA = crsA->getLocalMatrixDevice();
387 #define MueLu_runBoundaryFunctors(...) \
389 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
390 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
395 if (rowSumTol <= 0.) {
402 #undef MueLu_runBoundaryFunctors
431 #if !defined(HAVE_MUELU_DEBUG)
432 #define MueLu_runDroppingFunctorsImpl(...) \
434 auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \
435 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz_filtered); \
438 #define MueLu_runDroppingFunctorsImpl(...) \
440 auto debug = Misc::DebugFunctor(lclA, results); \
441 auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \
442 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz_filtered); \
448 #define MueLu_runDroppingFunctors(...) \
451 auto BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber"); \
452 auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *BlockNumber, results); \
453 MueLu_runDroppingFunctorsImpl(block_diagonalize, __VA_ARGS__); \
455 MueLu_runDroppingFunctorsImpl(__VA_ARGS__); \
460 #define MueLu_runDroppingFunctors_on_A(SoC) \
462 if (droppingMethod == "point-wise") { \
463 auto dropping = ClassicalDropping::make_drop_functor<SoC>(*A, threshold, results); \
465 if (aggregationMayCreateDirichlet) { \
466 MueLu_runDroppingFunctors(dropping, \
468 preserve_diagonals, \
469 mark_singletons_as_boundary); \
471 MueLu_runDroppingFunctors(dropping, \
473 preserve_diagonals); \
475 } else if (droppingMethod == "cut-drop") { \
476 auto comparison = CutDrop::make_comparison_functor<SoC>(*A, results); \
477 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
479 MueLu_runDroppingFunctors(drop_boundaries, \
480 preserve_diagonals, \
487 #define MueLu_runDroppingFunctors_on_dlap_inner(SoC) \
489 if (droppingMethod == "point-wise") { \
490 auto dist_laplacian_dropping = DistanceLaplacian::make_drop_functor<SoC>(*A, threshold, dist2, results); \
492 if (aggregationMayCreateDirichlet) { \
493 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
495 preserve_diagonals, \
496 mark_singletons_as_boundary); \
498 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
500 preserve_diagonals); \
502 } else if (droppingMethod == "cut-drop") { \
503 auto comparison = CutDrop::make_dlap_comparison_functor<SoC>(*A, dist2, results); \
504 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
506 MueLu_runDroppingFunctors(drop_boundaries, \
507 preserve_diagonals, \
514 #define MueLu_runDroppingFunctors_on_dlap(SoC) \
516 if (distanceLaplacianMetric == "unweighted") { \
517 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords); \
518 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
519 } else if (distanceLaplacianMetric == "material") { \
520 auto material = GetMaterial(currentLevel, coords->getNumVectors()); \
521 if (material->getNumVectors() == 1) { \
522 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material); \
523 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
525 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material); \
526 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
532 auto filtered_rowptr = rowptr_type(
"filtered_rowptr", lclA.numRows() + 1);
536 auto results = Kokkos::View<DecisionType*, memory_space>(
"results", lclA.nnz());
542 if (threshold != zero) {
546 if (socUsesMatrix ==
"A") {
547 if (socUsesMeasure ==
"unscaled") {
549 }
else if (socUsesMeasure ==
"smoothed aggregation") {
551 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
553 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
556 }
else if (socUsesMatrix ==
"distance laplacian") {
557 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
558 if (socUsesMeasure ==
"unscaled") {
560 }
else if (socUsesMeasure ==
"smoothed aggregation") {
562 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
564 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
569 Kokkos::deep_copy(results,
KEEP);
576 if (symmetrizeDroppedGraph) {
581 GO numDropped = lclA.nnz() - nnz_filtered;
594 local_matrix_type lclFilteredA;
595 local_graph_type lclGraph;
597 filteredA = MatrixFactory::BuildCopy(A);
598 lclFilteredA = filteredA->getLocalMatrixDevice();
600 auto colidx = entries_type(
"entries", nnz_filtered);
601 lclGraph = local_graph_type(colidx, filtered_rowptr);
603 auto colidx = entries_type(
"entries", nnz_filtered);
604 auto values = values_type(
"values", nnz_filtered);
605 lclFilteredA = local_matrix_type(
"filteredA",
606 lclA.numRows(), lclA.numCols(),
608 values, filtered_rowptr, colidx);
614 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
617 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
622 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
625 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
630 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
631 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
633 if (reuseEigenvalue) {
638 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
645 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
647 graph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"amalgamated graph of A"));
652 if (generateColoringGraph) {
653 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
655 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
656 if (localizeColoringGraph) {
660 if (symmetrizeColoringGraph) {
664 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
665 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
667 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
669 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
670 Set(currentLevel,
"Coloring Graph", colorGraph);
673 #undef MueLu_runDroppingFunctors_on_A
674 #undef MueLu_runDroppingFunctors_on_dlap_inner
675 #undef MueLu_runDroppingFunctors_on_dlap
676 #undef MueLu_runDroppingFunctors
677 #undef MueLu_runDroppingFunctorsImpl
680 Set(currentLevel,
"DofsPerNode", dofsPerNode);
681 Set(currentLevel,
"Graph", graph);
682 Set(currentLevel,
"A", filteredA);
684 return std::make_tuple(numDropped, boundaryNodes);
687 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
694 using local_matrix_type =
typename MatrixType::local_matrix_type;
695 using local_graph_type =
typename GraphType::local_graph_type;
696 using rowptr_type =
typename local_graph_type::row_map_type::non_const_type;
697 using entries_type =
typename local_graph_type::entries_type::non_const_type;
698 using values_type =
typename local_matrix_type::values_type::non_const_type;
699 using device_type =
typename Node::device_type;
700 using memory_space =
typename device_type::memory_space;
707 auto A = Get<RCP<Matrix>>(currentLevel,
"A");
727 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
729 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel,
"UnAmalgamationInfo");
741 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
742 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
744 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
745 rowTranslationView(rowTranslationArray.
getRawPtr(), rowTranslationArray.
size());
746 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
747 colTranslationView(colTranslationArray.
getRawPtr(), colTranslationArray.
size());
750 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
751 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
752 id_translation_type rowTranslation(
"dofId2nodeId", rowTranslationArray.
size());
753 id_translation_type colTranslation(
"ov_dofId2nodeId", colTranslationArray.
size());
754 Kokkos::deep_copy(rowTranslation, rowTranslationView);
755 Kokkos::deep_copy(colTranslation, colTranslationView);
758 blkSize = A->GetFixedBlockSize();
761 if (A->IsView(
"stridedMaps") ==
true) {
765 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
766 blkId = strMap->getStridedBlockId();
768 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
778 const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
779 const magnitudeType rowSumTol = as<magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
781 const bool useGreedyDirichlet = pL.get<
bool>(
"aggregation: greedy Dirichlet");
785 bool useBlocking = pL.get<
bool>(
"aggregation: use blocking");
786 std::string droppingMethod = pL.get<std::string>(
"aggregation: drop scheme");
787 std::string socUsesMatrix = pL.get<std::string>(
"aggregation: strength-of-connection: matrix");
788 std::string socUsesMeasure = pL.get<std::string>(
"aggregation: strength-of-connection: measure");
789 std::string distanceLaplacianMetric = pL.get<std::string>(
"aggregation: distance laplacian metric");
790 bool symmetrizeDroppedGraph = pL.get<
bool>(
"aggregation: symmetrize graph after dropping");
791 magnitudeType threshold;
793 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
794 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
796 threshold = as<magnitudeType>(pL.get<
double>(
"aggregation: drop tol"));
797 bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
800 const bool lumping = pL.get<
bool>(
"filtered matrix: use lumping");
801 const bool reuseGraph = pL.get<
bool>(
"filtered matrix: reuse graph");
802 const bool reuseEigenvalue = pL.get<
bool>(
"filtered matrix: reuse eigenvalue");
804 const bool useRootStencil = pL.get<
bool>(
"filtered matrix: use root stencil");
805 const bool useSpreadLumping = pL.get<
bool>(
"filtered matrix: use spread lumping");
807 const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<
double>(
"filtered matrix: Dirichlet threshold"));
810 bool generateColoringGraph = pL.get<
bool>(
"aggregation: coloring: use color graph");
811 const bool localizeColoringGraph = pL.get<
bool>(
"aggregation: coloring: localize color graph");
812 const bool symmetrizeColoringGraph =
true;
814 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
815 translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
818 std::stringstream ss;
819 ss <<
"dropping scheme = \"" << droppingMethod <<
"\", strength-of-connection measure = \"" << socUsesMeasure <<
"\", strength-of-connection matrix = \"" << socUsesMatrix <<
"\", ";
820 if (socUsesMatrix ==
"distance laplacian")
821 ss <<
"distance laplacian metric = \"" << distanceLaplacianMetric <<
"\", ";
822 ss <<
"threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() <<
", useBlocking = " << useBlocking;
823 ss <<
", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
830 if (droppingMethod ==
"cut-drop")
831 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0,
Exceptions::RuntimeError,
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold <<
", needs to be <= 1.0");
844 auto crsA = toCrsMatrix(A);
845 auto lclA = crsA->getLocalMatrixDevice();
860 #define MueLu_runBoundaryFunctors(...) \
862 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
863 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
866 if (useGreedyDirichlet) {
873 #undef MueLu_runBoundaryFunctors
896 #if !defined(HAVE_MUELU_DEBUG)
897 #define MueLu_runDroppingFunctorsImpl(...) \
899 auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \
900 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz); \
903 #define MueLu_runDroppingFunctorsImpl(...) \
905 auto debug = Misc::DebugFunctor(lclA, results); \
906 auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \
907 Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz); \
913 #define MueLu_runDroppingFunctors(...) \
916 auto BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber"); \
917 auto block_diagonalize = Misc::BlockDiagonalizeVectorFunctor(*A, *BlockNumber, nonUniqueMap, results, rowTranslation, colTranslation); \
918 MueLu_runDroppingFunctorsImpl(block_diagonalize, __VA_ARGS__); \
920 MueLu_runDroppingFunctorsImpl(__VA_ARGS__); \
925 #define MueLu_runDroppingFunctors_on_A(SoC) \
927 if (droppingMethod == "point-wise") { \
928 auto dropping = ClassicalDropping::make_drop_functor<SoC>(*A, threshold, results); \
930 if (aggregationMayCreateDirichlet) { \
931 MueLu_runDroppingFunctors(dropping, \
932 preserve_diagonals, \
933 mark_singletons_as_boundary); \
935 MueLu_runDroppingFunctors(dropping, \
936 preserve_diagonals); \
938 } else if (droppingMethod == "cut-drop") { \
939 auto comparison = CutDrop::make_comparison_functor<SoC>(*A, results); \
940 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
942 MueLu_runDroppingFunctors(drop_boundaries, \
943 preserve_diagonals, \
950 #define MueLu_runDroppingFunctors_on_dlap_inner(SoC) \
952 if (droppingMethod == "point-wise") { \
953 auto dist_laplacian_dropping = DistanceLaplacian::make_vector_drop_functor<SoC>(*A, *mergedA, threshold, dist2, results, rowTranslation, colTranslation); \
955 if (aggregationMayCreateDirichlet) { \
956 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
957 preserve_diagonals, \
958 mark_singletons_as_boundary); \
960 MueLu_runDroppingFunctors(dist_laplacian_dropping, \
961 preserve_diagonals); \
963 } else if (droppingMethod == "cut-drop") { \
964 auto comparison = CutDrop::make_dlap_comparison_functor<SoC>(*A, dist2, results); \
965 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
967 MueLu_runDroppingFunctors(drop_boundaries, \
968 preserve_diagonals, \
975 #define MueLu_runDroppingFunctors_on_dlap(SoC) \
977 if (distanceLaplacianMetric == "unweighted") { \
978 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*mergedA, coords); \
979 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
980 } else if (distanceLaplacianMetric == "weighted") { \
981 auto k_dlap_weights_host = Kokkos::View<double*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&dlap_weights[0], dlap_weights.size()); \
982 auto k_dlap_weights = Kokkos::View<double*>("dlap_weights", k_dlap_weights_host.extent(0)); \
983 Kokkos::deep_copy(k_dlap_weights, k_dlap_weights_host); \
984 auto dist2 = DistanceLaplacian::WeightedDistanceFunctor(*mergedA, coords, k_dlap_weights); \
985 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
986 } else if (distanceLaplacianMetric == "block weighted") { \
987 auto k_dlap_weights_host = Kokkos::View<double*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&dlap_weights[0], dlap_weights.size()); \
988 auto k_dlap_weights = Kokkos::View<double*>("dlap_weights", k_dlap_weights_host.extent(0)); \
989 Kokkos::deep_copy(k_dlap_weights, k_dlap_weights_host); \
990 auto dist2 = DistanceLaplacian::BlockWeightedDistanceFunctor(*mergedA, coords, k_dlap_weights, interleaved_blocksize); \
991 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
992 } else if (distanceLaplacianMetric == "material") { \
993 auto material = GetMaterial(currentLevel, coords->getNumVectors()); \
994 if (material->getNumVectors() == 1) { \
995 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*mergedA, coords, material); \
996 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
998 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*mergedA, coords, material); \
999 MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
1005 auto filtered_rowptr = rowptr_type(
"rowptr", lclA.numRows() + 1);
1006 auto graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
1008 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
1011 auto results = Kokkos::View<DecisionType*, memory_space>(
"results", lclA.nnz());
1017 if (threshold != zero) {
1021 if (socUsesMatrix ==
"A") {
1022 if (socUsesMeasure ==
"unscaled") {
1024 }
else if (socUsesMeasure ==
"smoothed aggregation") {
1026 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
1028 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
1031 }
else if (socUsesMatrix ==
"distance laplacian") {
1032 auto coords = Get<RCP<doubleMultiVector>>(currentLevel,
"Coordinates");
1035 LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
1036 if (socUsesMeasure ==
"distance laplacian") {
1037 LO dim = (
LO)coords->getNumVectors();
1039 bool non_unity =
false;
1040 for (
LO i = 0; !non_unity && i < (
LO)dlap_weights.size(); i++) {
1041 if (dlap_weights[i] != 1.0) {
1046 if ((
LO)dlap_weights.size() == dim) {
1047 distanceLaplacianMetric =
"weighted";
1048 }
else if ((
LO)dlap_weights.size() == interleaved_blocksize * dim)
1049 distanceLaplacianMetric =
"block weighted";
1052 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
1055 GetOStream(
Statistics1) <<
"Using distance laplacian weights: " << dlap_weights << std::endl;
1063 auto merged_rowptr = rowptr_type(
"rowptr", numNodes + 1);
1067 Kokkos::parallel_scan(
"MergeCount", range, functor, nnz_merged);
1069 local_graph_type lclMergedGraph;
1070 auto colidx_merged = entries_type(
"entries", nnz_merged);
1071 auto values_merged = values_type(
"values", nnz_merged);
1073 local_matrix_type lclMergedA = local_matrix_type(
"mergedA",
1074 numNodes, nonUniqueMap->getLocalNumElements(),
1076 values_merged, merged_rowptr, colidx_merged);
1079 Kokkos::parallel_for(
"MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
1084 if (socUsesMeasure ==
"unscaled") {
1086 }
else if (socUsesMeasure ==
"smoothed aggregation") {
1088 }
else if (socUsesMeasure ==
"signed ruge-stueben") {
1090 }
else if (socUsesMeasure ==
"signed smoothed aggregation") {
1095 Kokkos::deep_copy(results,
KEEP);
1101 if (symmetrizeDroppedGraph) {
1108 GO numTotal = lclA.nnz();
1109 GO numDropped = numTotal - nnz_filtered;
1122 local_matrix_type lclFilteredA;
1124 lclFilteredA = local_matrix_type(
"filteredA", lclA.graph, lclA.numCols());
1126 auto colidx = entries_type(
"entries", nnz_filtered);
1127 auto values = values_type(
"values", nnz_filtered);
1128 lclFilteredA = local_matrix_type(
"filteredA",
1129 lclA.numRows(), lclA.numCols(),
1131 values, filtered_rowptr, colidx);
1134 local_graph_type lclGraph;
1136 auto colidx = entries_type(
"entries", nnz_graph);
1137 lclGraph = local_graph_type(colidx, graph_rowptr);
1143 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
1146 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
1151 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
1154 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
1159 filteredA->SetFixedBlockSize(blkSize);
1161 if (reuseEigenvalue) {
1166 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1171 graph =
rcp(
new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1176 if (generateColoringGraph) {
1177 SubFactoryMonitor mColoringGraph(*
this,
"Construct coloring graph", currentLevel);
1179 filtered_rowptr = rowptr_type(
"rowptr_coloring_graph", lclA.numRows() + 1);
1180 graph_rowptr = rowptr_type(
"rowptr", numNodes + 1);
1181 if (localizeColoringGraph) {
1185 if (symmetrizeColoringGraph) {
1189 auto colidx = entries_type(
"entries_coloring_graph", nnz_filtered);
1190 auto lclGraph = local_graph_type(colidx, filtered_rowptr);
1192 Kokkos::parallel_for(
"MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
1194 auto colorGraph =
rcp(
new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(),
"coloring graph"));
1195 Set(currentLevel,
"Coloring Graph", colorGraph);
1198 #undef MueLu_runDroppingFunctors_on_dlap
1199 #undef MueLu_runDroppingFunctors_on_dlap_inner
1200 #undef MueLu_runDroppingFunctors_on_A
1201 #undef MueLu_runDroppingFunctors
1202 #undef MueLu_runDroppingFunctorsImpl
1204 LO dofsPerNode = blkSize;
1206 Set(currentLevel,
"DofsPerNode", dofsPerNode);
1207 Set(currentLevel,
"Graph", graph);
1208 Set(currentLevel,
"A", filteredA);
1210 return std::make_tuple(numDropped, boundaryNodes);
1214 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
Teuchos::RCP< MultiVector > GetMaterial(Level ¤tLevel, size_t spatialDim) const
void setValidator(RCP< const ParameterEntryValidator > const &validator)
Functor that drops boundary nodes for a blockSize > 1 problem.
T & get(const std::string &name, T def_value)
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define MueLu_runDroppingFunctors_on_dlap(SoC)
#define MueLu_runBoundaryFunctors(...)
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that drops off-rank entries.
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level ¤tLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
Functor that marks diagonal as kept, unless the are already marked as boundary.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level ¤tLevel) const
#define MueLu_runDroppingFunctors_on_A(SoC)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level ¤tLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level ¤tLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
#define MueLu_runDroppingFunctors(...)