10 #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_
11 #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_
15 #include <Xpetra_Map.hpp>
17 #include <Xpetra_MultiVectorFactory.hpp>
23 #include "MueLu_InterfaceAggregationAlgorithm.hpp"
24 #include "MueLu_OnePtAggregationAlgorithm.hpp"
25 #include "MueLu_PreserveDirichletAggregationAlgorithm.hpp"
27 #include "MueLu_AggregationPhase1Algorithm.hpp"
28 #include "MueLu_AggregationPhase2aAlgorithm.hpp"
29 #include "MueLu_AggregationPhase2bAlgorithm.hpp"
30 #include "MueLu_AggregationPhase3Algorithm.hpp"
33 #include "MueLu_LWGraph.hpp"
34 #include "MueLu_Aggregates.hpp"
38 #include "KokkosGraph_Distance2ColorHandle.hpp"
39 #include "KokkosGraph_Distance2Color.hpp"
40 #include "KokkosGraph_MIS2.hpp"
41 #include "Kokkos_UnorderedMap.hpp"
45 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
47 : bDefinitionPhase_(true) {}
49 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
52 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
59 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
78 SET_VALID_ENTRY(
"aggregation: error on nodes with no on-rank neighbors");
83 #undef SET_VALID_ENTRY
87 validParamList->
set<
RCP<const FactoryBase>>(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
90 validParamList->
set<std::string>(
"OnePt aggregate map name",
"",
"Name of input map for single node aggregates. (default='')");
91 validParamList->
set<std::string>(
"OnePt aggregate map factory",
"",
"Generating factory of (DOF) map for single node aggregates.");
96 validParamList->
set<std::string>(
"Interface aggregate map name",
"",
"Name of input map for interface aggregates. (default='')");
97 validParamList->
set<std::string>(
"Interface aggregate map factory",
"",
"Generating factory of (DOF) map for interface aggregates.");
98 validParamList->
set<
RCP<const FactoryBase>>(
"nodeOnInterface", Teuchos::null,
"Array specifying whether or not a node is on the interface (1 or 0).");
100 return validParamList;
103 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
105 Input(currentLevel,
"Graph");
106 Input(currentLevel,
"DofsPerNode");
111 std::string mapOnePtName = pL.
get<std::string>(
"OnePt aggregate map name");
112 if (mapOnePtName.length() > 0) {
113 std::string mapOnePtFactName = pL.
get<std::string>(
"OnePt aggregate map factory");
114 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
123 if (pL.
get<
bool>(
"aggregation: use interface aggregation") ==
true) {
130 "nodeOnInterface was not provided by the user on level0!");
133 Input(currentLevel,
"nodeOnInterface");
138 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
143 bDefinitionPhase_ =
false;
145 if (pL.
get<
int>(
"aggregation: max agg size") == -1)
146 pL.
set(
"aggregation: max agg size", INT_MAX);
161 std::string mapOnePtName = pL.
get<std::string>(
"OnePt aggregate map name");
163 if (mapOnePtName.length()) {
164 std::string mapOnePtFactName = pL.
get<std::string>(
"OnePt aggregate map factory");
165 if (mapOnePtFactName ==
"" || mapOnePtFactName ==
"NoFactory") {
169 OnePtMap = currentLevel.
Get<
RCP<Map>>(mapOnePtName, mapOnePtFact.
get());
174 std::string mapInterfaceName = pL.
get<std::string>(
"Interface aggregate map name");
175 RCP<Map> InterfaceMap = Teuchos::null;
183 const std::string aggregationBackend = pL.
get<std::string>(
"aggregation: backend");
195 if ((aggregationBackend ==
"default") || (aggregationBackend ==
"host")) {
196 graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
202 RCP<LWGraph> tmp_graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
205 comm = graph_kokkos->GetComm();
206 numRows = graph_kokkos->GetNodeNumVertices();
210 if ((aggregationBackend ==
"default") || (aggregationBackend ==
"kokkos")) {
211 graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
213 comm = graph_kokkos->
GetComm();
220 comm = graph->GetComm();
221 numRows = graph->GetNodeNumVertices();
229 TEUCHOS_TEST_FOR_EXCEPTION(pL.
get<
bool>(
"aggregation: use interface aggregation"), std::invalid_argument,
"Option: 'aggregation: use interface aggregation' is not supported in the Kokkos version of uncoupled aggregation");
231 TEUCHOS_TEST_FOR_EXCEPTION(pL.
get<
bool>(
"aggregation: match ML phase1"), std::invalid_argument,
"Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation");
240 AggStatHostType aggStatHost;
244 aggStatHost = AggStatHostType(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"), numRows);
245 Kokkos::deep_copy(aggStatHost,
READY);
247 aggStat = AggStatType(Kokkos::ViewAllocateWithoutInitializing(
"aggregation status"), numRows);
248 Kokkos::deep_copy(aggStat,
READY);
252 if (pL.
get<
bool>(
"aggregation: use interface aggregation") ==
true) {
254 for (
LO i = 0; i < numRows; i++) {
255 if (nodeOnInterface[i])
264 Kokkos::parallel_for(
265 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
266 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph::execution_space>(0, numRows),
268 if (dirichletBoundaryMap(nodeIdx) ==
true) {
274 Kokkos::parallel_for(
275 "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
276 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
278 if (dirichletBoundaryMap(nodeIdx) ==
true) {
285 if (OnePtMap != Teuchos::null) {
286 LO nDofsPerNode = Get<LO>(currentLevel,
"DofsPerNode");
290 for (
LO i = 0; i < numRows; i++) {
292 GO grid = (graph->
GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
294 for (
LO kr = 0; kr < nDofsPerNode; kr++)
295 if (OnePtMap->isNodeGlobalElement(grid + kr))
296 aggStatHost(i) =
ONEPT;
300 auto lclDomainMap = graph_kokkos->
GetDomainMap()->getLocalMap();
301 auto lclOnePtMap = OnePtMap->getLocalMap();
302 const LocalOrdinal INVALID = Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
303 Kokkos::parallel_for(
304 "MueLu - UncoupledAggregation: tagging OnePt map",
305 Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
308 GO grid = (lclDomainMap.getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
310 for (
LO kr = 0; kr < nDofsPerNode; kr++)
311 if (lclOnePtMap.getLocalElement(grid + kr) != INVALID)
317 LO numNonAggregatedNodes = numRows;
318 std::string aggAlgo = pL.
get<std::string>(
"aggregation: coloring algorithm");
319 if (aggAlgo ==
"mis2 coarsening" || aggAlgo ==
"mis2 aggregation") {
324 using device_t =
typename graph_t::device_type;
325 using exec_space =
typename device_t::execution_space;
326 using rowmap_t =
typename graph_t::row_map_type;
327 using colinds_t =
typename graph_t::entries_type;
328 using lno_t =
typename colinds_t::non_const_value_type;
329 rowmap_t aRowptrs = graph_kokkos->
getRowPtrs();
330 colinds_t aColinds = graph_kokkos->
getEntries();
332 typename colinds_t::non_const_type labels;
334 if (aggAlgo ==
"mis2 coarsening") {
336 labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
337 }
else if (aggAlgo ==
"mis2 aggregation") {
339 labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
344 Kokkos::UnorderedMap<LocalOrdinal, void, exec_space> used_labels(numAggs);
345 Kokkos::parallel_for(
346 "MueLu::UncoupledAggregationFactory::MIS2::nonempty_aggs",
347 Kokkos::RangePolicy<exec_space>(0, numRows),
348 KOKKOS_LAMBDA(lno_t i) {
349 if (aggStat(i) ==
READY)
350 used_labels.insert(labels(i));
353 if (used_labels.failed_insert()) {
357 s <<
"numAggs: " << numAggs << std::endl;
358 auto labels_h = Kokkos::create_mirror_view(labels);
359 Kokkos::deep_copy(labels_h, labels);
360 for (
int kk = 0; kk < labels_h.extent_int(0); ++kk) {
361 s << labels_h(kk) <<
" ";
364 std::cout << s.str();
369 Kokkos::View<LO*, typename device_t::memory_space> new_labels(
"new_labels", numAggs);
370 Kokkos::parallel_scan(
371 "MueLu::UncoupledAggregationFactory::MIS2::set_new_labels",
372 Kokkos::RangePolicy<exec_space>(0, used_labels.capacity()),
373 KOKKOS_LAMBDA(lno_t i, lno_t & update,
const bool is_final) {
374 if (used_labels.valid_at(i)) {
375 auto label = used_labels.key_at(i);
377 new_labels(label) = update;
386 used_labels.rehash(0);
389 Kokkos::parallel_for(
390 "MueLu::UncoupledAggregationFactory::MIS2::reassign_labels",
391 Kokkos::RangePolicy<exec_space>(0, numRows),
392 KOKKOS_LAMBDA(lno_t i) {
393 labels(i) = new_labels(labels(i));
397 auto vertex2AggId = aggregates->
GetVertex2AggId()->getLocalViewDevice(Xpetra::Access::ReadWrite);
398 auto procWinner = aggregates->
GetProcWinner()->getLocalViewDevice(Xpetra::Access::OverwriteAll);
399 int rank = comm->getRank();
400 Kokkos::parallel_for(
401 Kokkos::RangePolicy<exec_space>(0, numRows),
402 KOKKOS_LAMBDA(lno_t i) {
403 if (aggStat(i) ==
READY) {
404 #ifdef HAVE_MUELU_DEBUG
405 KOKKOS_ASSERT(labels(i) >= 0);
407 procWinner(i, 0) = rank;
409 vertex2AggId(i, 0) = labels(i);
417 numNonAggregatedNodes = 0;
421 DoGraphColoring(currentLevel, aggAlgo, pL.
get<
bool>(
"aggregation: deterministic"), graph_kokkos, aggregates);
427 std::vector<GO> localStats;
429 localStats = std::vector<GO>(1 + 2 * algos_.size());
430 localStats[0] = numRows;
432 for (
size_t a = 0; a < algos_.size(); a++) {
433 std::string phase = algos_[a]->description();
435 SubFactoryMonitor sfm2(*
this,
"Algo \"" + phase +
"\"" + (numNonAggregatedNodes == 0 ?
" [skipped since no nodes are left to aggregate]" :
""), currentLevel);
436 int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
438 algos_[a]->SetupPhase(pL, comm, numRows, numNonAggregatedNodes);
440 if (numNonAggregatedNodes > 0) {
442 algos_[a]->BuildAggregatesNonKokkos(pL, *graph, *aggregates, aggStatHost, numNonAggregatedNodes);
444 algos_[a]->BuildAggregates(pL, *graph_kokkos, *aggregates, aggStat, numNonAggregatedNodes);
446 algos_[a]->SetProcRankVerbose(oldRank);
449 localStats[2 * a + 1] = numRows - numNonAggregatedNodes;
454 std::vector<GO> globalStats(1 + 2 * algos_.size());
455 Teuchos::reduceAll(*comm,
Teuchos::REDUCE_SUM, (
int)localStats.size(), localStats.data(), globalStats.data());
456 GO numGlobalRows = globalStats[0];
457 GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
458 std::stringstream ss;
459 for (
size_t a = 0; a < algos_.size(); a++) {
460 std::string phase = algos_[a]->description();
461 GO numGlobalAggregated = globalStats[2 * a + 1];
462 GO numGlobalAggs = globalStats[2 * a + 2];
463 GO numGlobalNonAggregatedNodes = numGlobalRows - numGlobalAggregatedPrev;
464 double aggPercent = 100 * as<double>(numGlobalAggregated) / as<double>(numGlobalRows);
465 if (aggPercent > 99.99 && aggPercent < 100.00) {
473 ss <<
"Algo \"" + phase +
"\"" + (numGlobalNonAggregatedNodes == 0 ?
" [skipped since no nodes are left to aggregate]" :
"") << std::endl
474 <<
" aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) <<
" (phase), " << std::fixed
475 << std::setprecision(2) << numGlobalAggregated <<
"/" << numGlobalRows <<
" [" << aggPercent <<
"%] (total)\n"
476 <<
" remaining : " << numGlobalRows - numGlobalAggregated <<
"\n"
477 <<
" aggregates : " << numGlobalAggs - numGlobalAggsPrev <<
" (phase), " << numGlobalAggs <<
" (total)" << std::endl;
478 numGlobalAggregatedPrev = numGlobalAggregated;
479 numGlobalAggsPrev = numGlobalAggs;
490 Set(currentLevel,
"Aggregates", aggregates);
493 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
496 const std::string& aggAlgo,
497 const bool deterministic,
508 using KernelHandle = KokkosKernels::Experimental::
509 KokkosKernelsHandle<
typename graph_t::row_map_type::value_type,
510 typename graph_t::entries_type::value_type,
511 typename graph_t::entries_type::value_type,
512 typename graph_t::device_type::execution_space,
513 typename graph_t::device_type::memory_space,
514 typename graph_t::device_type::memory_space>;
517 kh.create_distance2_graph_coloring_handle();
520 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
533 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
535 }
else if (aggAlgo ==
"serial") {
536 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
538 }
else if (aggAlgo ==
"default") {
539 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT);
541 }
else if (aggAlgo ==
"vertex based") {
542 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB);
544 }
else if (aggAlgo ==
"vertex based bit set") {
545 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT);
547 }
else if (aggAlgo ==
"edge filtering") {
548 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF);
550 }
else if (aggAlgo ==
"net based bit set") {
551 coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT);
554 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
558 typename graph_t::row_map_type aRowptrs = graph->
getRowPtrs();
559 typename graph_t::entries_type aColinds = graph->
getEntries();
564 SubFactoryMonitor sfm2(*
this,
"Algo \"Graph Coloring\": KokkosGraph Call", currentLevel);
565 KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
570 aggregates->
SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
573 kh.destroy_distance2_graph_coloring_handle();
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Algorithm for coarsening a graph with uncoupled aggregation. keep special marked nodes as singleton n...
RCP< MueLu::LWGraph< LocalOrdinal, GlobalOrdinal, Node > > copyToHost()
MueLu::DefaultLocalOrdinal LocalOrdinal
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
void DoGraphColoring(Level ¤tLevel, const std::string &aggAlgo, const bool deterministic, const RCP< const LWGraph_kokkos > graph, RCP< Aggregates > aggregates) const
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
KOKKOS_INLINE_FUNCTION row_type getRowPtrs() const
Return the row pointers of the local graph.
void SetGraphNumColors(const LO graphNumColors)
Set the number of colors needed by the distance 2 coloring.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
typename std::conditional< OnHost, typename local_graph_device_type::HostMirror, local_graph_device_type >::type local_graph_type
void setValidator(RCP< const ParameterEntryValidator > const &validator)
virtual ~UncoupledAggregationFactory()
Destructor.
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
void DeclareInput(Level ¤tLevel) const
Input.
static const NoFactory * get()
Algorithm for coarsening a graph with uncoupled aggregation. creates aggregates along an interface us...
Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might be ne...
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
#define MUELU_UNAGGREGATED
KOKKOS_INLINE_FUNCTION entries_type getEntries() const
Return the list entries in the local graph.
void SetGraphColors(colors_view_type graphColors)
Set a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
virtual void setObjectLabel(const std::string &objectLabel)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
#define SET_VALID_ENTRY(name)
UncoupledAggregationFactory()
Constructor.
Among unaggregated points, see if we can make a reasonable size aggregate out of it.IdeaAmong unaggregated points, see if we can make a reasonable size aggregate out of it. We do this by looking at neighbors and seeing how many are unaggregated and on my processor. Loosely, base the number of new aggregates created on the percentage of unaggregated nodes.
void Build(Level ¤tLevel) const
Build aggregates.
Add leftovers to existing aggregatesIdeaIn phase 2b non-aggregated nodes are added to existing aggreg...
RCP< MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node > > copyToDevice()
const RCP< const Map > GetDomainMap() const
KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool &flag)
Record whether aggregates include DOFs from other processes.
Algorithm for coarsening a graph with uncoupled aggregation.
int GetLevelID() const
Return level number.
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
Handle leftover nodes. Try to avoid singleton nodesIdeaIn phase 3 we try to stick unaggregated nodes ...
ParameterEntry & getEntry(const std::string &name)
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const Teuchos::Comm< int > > GetComm() const
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.