MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UncoupledAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_
11 #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_
12 
13 #include <climits>
14 
15 #include <Xpetra_Map.hpp>
16 #include <Xpetra_Vector.hpp>
17 #include <Xpetra_MultiVectorFactory.hpp>
18 #include <Xpetra_VectorFactory.hpp>
19 #include <sstream>
20 
22 
23 #include "MueLu_InterfaceAggregationAlgorithm.hpp"
24 #include "MueLu_OnePtAggregationAlgorithm.hpp"
25 #include "MueLu_PreserveDirichletAggregationAlgorithm.hpp"
26 
27 #include "MueLu_AggregationPhase1Algorithm.hpp"
28 #include "MueLu_AggregationPhase2aAlgorithm.hpp"
29 #include "MueLu_AggregationPhase2bAlgorithm.hpp"
30 #include "MueLu_AggregationPhase3Algorithm.hpp"
31 
32 #include "MueLu_Level.hpp"
33 #include "MueLu_LWGraph.hpp"
34 #include "MueLu_Aggregates.hpp"
35 #include "MueLu_MasterList.hpp"
36 #include "MueLu_Monitor.hpp"
37 
38 #include "KokkosGraph_Distance2ColorHandle.hpp"
39 #include "KokkosGraph_Distance2Color.hpp"
40 #include "KokkosGraph_MIS2.hpp"
41 #include "Kokkos_UnorderedMap.hpp"
42 
43 namespace MueLu {
44 
45 template <class LocalOrdinal, class GlobalOrdinal, class Node>
47  : bDefinitionPhase_(true) {}
48 
49 template <class LocalOrdinal, class GlobalOrdinal, class Node>
51 
52 template <class LocalOrdinal, class GlobalOrdinal, class Node>
54  RCP<ParameterList> validParamList = rcp(new ParameterList());
55 
56  // Aggregation parameters (used in aggregation algorithms)
57  // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
58 
59 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
60  SET_VALID_ENTRY("aggregation: max agg size");
61  SET_VALID_ENTRY("aggregation: min agg size");
62  SET_VALID_ENTRY("aggregation: max selected neighbors");
63  SET_VALID_ENTRY("aggregation: ordering");
64  validParamList->getEntry("aggregation: ordering").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("natural", "graph", "random"))));
65  SET_VALID_ENTRY("aggregation: deterministic");
66  SET_VALID_ENTRY("aggregation: coloring algorithm");
67  SET_VALID_ENTRY("aggregation: enable phase 1");
68  SET_VALID_ENTRY("aggregation: enable phase 2a");
69  SET_VALID_ENTRY("aggregation: enable phase 2b");
70  SET_VALID_ENTRY("aggregation: enable phase 3");
71  SET_VALID_ENTRY("aggregation: match ML phase1");
72  SET_VALID_ENTRY("aggregation: match ML phase2a");
73  SET_VALID_ENTRY("aggregation: match ML phase2b");
74  SET_VALID_ENTRY("aggregation: phase2a agg factor");
75  SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
76  SET_VALID_ENTRY("aggregation: allow user-specified singletons");
77  SET_VALID_ENTRY("aggregation: use interface aggregation");
78  SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors");
79  SET_VALID_ENTRY("aggregation: phase3 avoid singletons");
80  SET_VALID_ENTRY("aggregation: phase 1 algorithm");
81  SET_VALID_ENTRY("aggregation: backend");
82  validParamList->getEntry("aggregation: backend").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "host", "kokkos"))));
83 #undef SET_VALID_ENTRY
84 
85  // general variables needed in AggregationFactory
86  validParamList->set<RCP<const FactoryBase>>("Graph", null, "Generating factory of the graph");
87  validParamList->set<RCP<const FactoryBase>>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
88 
89  // special variables necessary for OnePtAggregationAlgorithm
90  validParamList->set<std::string>("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
91  validParamList->set<std::string>("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
92  // validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
93 
94  // InterfaceAggregation parameters
95  // validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds.");
96  validParamList->set<std::string>("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')");
97  validParamList->set<std::string>("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates.");
98  validParamList->set<RCP<const FactoryBase>>("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0).");
99 
100  return validParamList;
101 }
102 
103 template <class LocalOrdinal, class GlobalOrdinal, class Node>
105  Input(currentLevel, "Graph");
106  Input(currentLevel, "DofsPerNode");
107 
108  const ParameterList& pL = GetParameterList();
109 
110  // request special data necessary for OnePtAggregationAlgorithm
111  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
112  if (mapOnePtName.length() > 0) {
113  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
114  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
115  currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
116  } else {
117  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
118  currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
119  }
120  }
121 
122  // request special data necessary for InterfaceAggregation
123  if (pL.get<bool>("aggregation: use interface aggregation") == true) {
124  if (currentLevel.GetLevelID() == 0) {
125  if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) {
126  currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this);
127  } else {
128  TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()),
130  "nodeOnInterface was not provided by the user on level0!");
131  }
132  } else {
133  Input(currentLevel, "nodeOnInterface");
134  }
135  }
136 }
137 
138 template <class LocalOrdinal, class GlobalOrdinal, class Node>
140  FactoryMonitor m(*this, "Build", currentLevel);
141 
142  ParameterList pL = GetParameterList();
143  bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
144 
145  if (pL.get<int>("aggregation: max agg size") == -1)
146  pL.set("aggregation: max agg size", INT_MAX);
147 
148  // define aggregation algorithms
149  RCP<const FactoryBase> graphFact = GetFactory("Graph");
150 
151  // TODO Can we keep different aggregation algorithms over more Build calls?
152  algos_.clear();
153  algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact)));
154  if (pL.get<bool>("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact)));
155  if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact)));
156  if (pL.get<bool>("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact)));
157  if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact)));
158  if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact)));
159  if (pL.get<bool>("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact)));
160 
161  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
162  RCP<Map> OnePtMap = Teuchos::null;
163  if (mapOnePtName.length()) {
164  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
165  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
166  OnePtMap = currentLevel.Get<RCP<Map>>(mapOnePtName, NoFactory::get());
167  } else {
168  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
169  OnePtMap = currentLevel.Get<RCP<Map>>(mapOnePtName, mapOnePtFact.get());
170  }
171  }
172 
173  // Set map for interface aggregates
174  std::string mapInterfaceName = pL.get<std::string>("Interface aggregate map name");
175  RCP<Map> InterfaceMap = Teuchos::null;
176 
177  RCP<const LWGraph> graph;
178  RCP<const LWGraph_kokkos> graph_kokkos;
179  RCP<Aggregates> aggregates;
181  LO numRows;
182 
183  const std::string aggregationBackend = pL.get<std::string>("aggregation: backend");
184 
185  // "Graph" can have type "LWGraph" or "LWGraph_kokkos".
186  // The aggregation phases can call either "BuildAggregatesNonKokkos" or "BuildAggregates".
187 
188  // "aggregation: backend" can take values "default", "non-Kokkos" or "Kokkos".
189  // "default": run depending on the type of "Graph"
190  // "non-Kokkos": run the non-Kokkos aggregation, moving "Graph" to host if necessary
191  // "Kokkos": run the Kokkos aggregation, potentially move "Graph", moving "Graph" to device if necessary
192 
193  bool runOnHost;
194  if (IsType<RCP<LWGraph>>(currentLevel, "Graph")) {
195  if ((aggregationBackend == "default") || (aggregationBackend == "host")) {
196  graph = Get<RCP<LWGraph>>(currentLevel, "Graph");
197  aggregates = rcp(new Aggregates(*graph));
198  comm = graph->GetComm();
199  numRows = graph->GetNodeNumVertices();
200  runOnHost = true;
201  } else {
202  RCP<LWGraph> tmp_graph = Get<RCP<LWGraph>>(currentLevel, "Graph");
203  graph_kokkos = tmp_graph->copyToDevice();
204  aggregates = rcp(new Aggregates(*graph_kokkos));
205  comm = graph_kokkos->GetComm();
206  numRows = graph_kokkos->GetNodeNumVertices();
207  runOnHost = false;
208  }
209  } else if (IsType<RCP<LWGraph_kokkos>>(currentLevel, "Graph")) {
210  if ((aggregationBackend == "default") || (aggregationBackend == "kokkos")) {
211  graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel, "Graph");
212  aggregates = rcp(new Aggregates(*graph_kokkos));
213  comm = graph_kokkos->GetComm();
214  numRows = graph_kokkos->GetNodeNumVertices();
215  runOnHost = false;
216  } else {
217  RCP<LWGraph_kokkos> tmp_graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel, "Graph");
218  graph = tmp_graph_kokkos->copyToHost();
219  aggregates = rcp(new Aggregates(*graph));
220  comm = graph->GetComm();
221  numRows = graph->GetNodeNumVertices();
222  runOnHost = true;
223  }
224  } else {
225  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Graph has bad type.");
226  }
227 
228  if (!runOnHost) {
229  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("aggregation: use interface aggregation"), std::invalid_argument, "Option: 'aggregation: use interface aggregation' is not supported in the Kokkos version of uncoupled aggregation");
230  // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 , but it is in 2a and 2b
231  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("aggregation: match ML phase1"), std::invalid_argument, "Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation");
232  }
233 
234  // Build
235  aggregates->setObjectLabel("UC");
236 
237  // construct aggStat information
240  AggStatHostType aggStatHost;
241  AggStatType aggStat;
242 
243  if (runOnHost) {
244  aggStatHost = AggStatHostType(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), numRows);
245  Kokkos::deep_copy(aggStatHost, READY);
246  } else {
247  aggStat = AggStatType(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), numRows);
248  Kokkos::deep_copy(aggStat, READY);
249  }
250 
251  // interface
252  if (pL.get<bool>("aggregation: use interface aggregation") == true) {
253  Teuchos::Array<LO> nodeOnInterface = Get<Array<LO>>(currentLevel, "nodeOnInterface");
254  for (LO i = 0; i < numRows; i++) {
255  if (nodeOnInterface[i])
256  aggStatHost[i] = INTERFACE;
257  }
258  }
259 
260  // Dirichlet nodes
261  {
262  if (runOnHost) {
263  auto dirichletBoundaryMap = graph->GetBoundaryNodeMap();
264  Kokkos::parallel_for(
265  "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
266  Kokkos::RangePolicy<LocalOrdinal, typename LWGraph::execution_space>(0, numRows),
267  KOKKOS_LAMBDA(const LocalOrdinal nodeIdx) {
268  if (dirichletBoundaryMap(nodeIdx) == true) {
269  aggStatHost(nodeIdx) = BOUNDARY;
270  }
271  });
272  } else {
273  auto dirichletBoundaryMap = graph_kokkos->GetBoundaryNodeMap();
274  Kokkos::parallel_for(
275  "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
276  Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
277  KOKKOS_LAMBDA(const LocalOrdinal nodeIdx) {
278  if (dirichletBoundaryMap(nodeIdx) == true) {
279  aggStat(nodeIdx) = BOUNDARY;
280  }
281  });
282  }
283  }
284 
285  if (OnePtMap != Teuchos::null) {
286  LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
287 
288  if (runOnHost) {
289  GO indexBase = graph->GetDomainMap()->getIndexBase();
290  for (LO i = 0; i < numRows; i++) {
291  // reconstruct global row id (FIXME only works for contiguous maps)
292  GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
293 
294  for (LO kr = 0; kr < nDofsPerNode; kr++)
295  if (OnePtMap->isNodeGlobalElement(grid + kr))
296  aggStatHost(i) = ONEPT;
297  }
298  } else {
299  GO indexBase = graph_kokkos->GetDomainMap()->getIndexBase();
300  auto lclDomainMap = graph_kokkos->GetDomainMap()->getLocalMap();
301  auto lclOnePtMap = OnePtMap->getLocalMap();
302  const LocalOrdinal INVALID = Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
303  Kokkos::parallel_for(
304  "MueLu - UncoupledAggregation: tagging OnePt map",
305  Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
306  KOKKOS_LAMBDA(const LocalOrdinal i) {
307  // reconstruct global row id (FIXME only works for contiguous maps)
308  GO grid = (lclDomainMap.getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
309 
310  for (LO kr = 0; kr < nDofsPerNode; kr++)
311  if (lclOnePtMap.getLocalElement(grid + kr) != INVALID)
312  aggStat(i) = ONEPT;
313  });
314  }
315  }
316 
317  LO numNonAggregatedNodes = numRows;
318  std::string aggAlgo = pL.get<std::string>("aggregation: coloring algorithm");
319  if (aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") {
320  TEUCHOS_ASSERT(!runOnHost);
321 
322  SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel);
323  using graph_t = typename LWGraph_kokkos::local_graph_type;
324  using device_t = typename graph_t::device_type;
325  using exec_space = typename device_t::execution_space;
326  using rowmap_t = typename graph_t::row_map_type;
327  using colinds_t = typename graph_t::entries_type;
328  using lno_t = typename colinds_t::non_const_value_type;
329  rowmap_t aRowptrs = graph_kokkos->getRowPtrs();
330  colinds_t aColinds = graph_kokkos->getEntries();
331  lno_t numAggs = 0;
332  typename colinds_t::non_const_type labels;
333 
334  if (aggAlgo == "mis2 coarsening") {
335  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl;
336  labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
337  } else if (aggAlgo == "mis2 aggregation") {
338  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl;
339  labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
340  }
341  {
342  {
343  // find aggregates that are not empty
344  Kokkos::UnorderedMap<LocalOrdinal, void, exec_space> used_labels(numAggs);
345  Kokkos::parallel_for(
346  "MueLu::UncoupledAggregationFactory::MIS2::nonempty_aggs",
347  Kokkos::RangePolicy<exec_space>(0, numRows),
348  KOKKOS_LAMBDA(lno_t i) {
349  if (aggStat(i) == READY)
350  used_labels.insert(labels(i));
351  });
352  Kokkos::fence();
353  if (used_labels.failed_insert()) {
354  // CAG: I used to see crashes due to this check. Now I cannot reproduce them anymore.
355  // Leaving some debug code here in case it does pop up somewhere.
356  std::stringstream s;
357  s << "numAggs: " << numAggs << std::endl;
358  auto labels_h = Kokkos::create_mirror_view(labels);
359  Kokkos::deep_copy(labels_h, labels);
360  for (int kk = 0; kk < labels_h.extent_int(0); ++kk) {
361  s << labels_h(kk) << " ";
362  }
363  s << std::endl;
364  std::cout << s.str();
365  }
366  TEUCHOS_ASSERT(!used_labels.failed_insert());
367 
368  // compute aggIds for non-empty aggs
369  Kokkos::View<LO*, typename device_t::memory_space> new_labels("new_labels", numAggs);
370  Kokkos::parallel_scan(
371  "MueLu::UncoupledAggregationFactory::MIS2::set_new_labels",
372  Kokkos::RangePolicy<exec_space>(0, used_labels.capacity()),
373  KOKKOS_LAMBDA(lno_t i, lno_t & update, const bool is_final) {
374  if (used_labels.valid_at(i)) {
375  auto label = used_labels.key_at(i);
376  if (is_final) {
377  new_labels(label) = update;
378  }
379  ++update;
380  }
381  },
382  numAggs);
383 
384  // We no longer need the hashmap.
385  used_labels.clear();
386  used_labels.rehash(0);
387 
388  // reassign aggIds
389  Kokkos::parallel_for(
390  "MueLu::UncoupledAggregationFactory::MIS2::reassign_labels",
391  Kokkos::RangePolicy<exec_space>(0, numRows),
392  KOKKOS_LAMBDA(lno_t i) {
393  labels(i) = new_labels(labels(i));
394  });
395  }
396 
397  auto vertex2AggId = aggregates->GetVertex2AggId()->getLocalViewDevice(Xpetra::Access::ReadWrite);
398  auto procWinner = aggregates->GetProcWinner()->getLocalViewDevice(Xpetra::Access::OverwriteAll);
399  int rank = comm->getRank();
400  Kokkos::parallel_for(
401  Kokkos::RangePolicy<exec_space>(0, numRows),
402  KOKKOS_LAMBDA(lno_t i) {
403  if (aggStat(i) == READY) {
404 #ifdef HAVE_MUELU_DEBUG
405  KOKKOS_ASSERT(labels(i) >= 0);
406 #endif
407  procWinner(i, 0) = rank;
408  aggStat(i) = AGGREGATED;
409  vertex2AggId(i, 0) = labels(i);
410  } else {
411  procWinner(i, 0) = MUELU_UNASSIGNED;
412  aggStat(i) = IGNORED;
413  vertex2AggId(i, 0) = MUELU_UNAGGREGATED;
414  }
415  });
416  }
417  numNonAggregatedNodes = 0;
418  aggregates->SetNumAggregates(numAggs);
419  } else {
420  if (!runOnHost) {
421  DoGraphColoring(currentLevel, aggAlgo, pL.get<bool>("aggregation: deterministic"), graph_kokkos, aggregates);
422  if (IsPrint(Statistics1)) {
423  GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl;
424  }
425  }
426 
427  std::vector<GO> localStats;
428  if (IsPrint(Statistics1)) {
429  localStats = std::vector<GO>(1 + 2 * algos_.size());
430  localStats[0] = numRows;
431  }
432  for (size_t a = 0; a < algos_.size(); a++) {
433  std::string phase = algos_[a]->description();
434 
435  SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"" + (numNonAggregatedNodes == 0 ? " [skipped since no nodes are left to aggregate]" : ""), currentLevel);
436  int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
437 
438  algos_[a]->SetupPhase(pL, comm, numRows, numNonAggregatedNodes);
439 
440  if (numNonAggregatedNodes > 0) {
441  if (runOnHost)
442  algos_[a]->BuildAggregatesNonKokkos(pL, *graph, *aggregates, aggStatHost, numNonAggregatedNodes);
443  else
444  algos_[a]->BuildAggregates(pL, *graph_kokkos, *aggregates, aggStat, numNonAggregatedNodes);
445  }
446  algos_[a]->SetProcRankVerbose(oldRank);
447 
448  if (IsPrint(Statistics1)) {
449  localStats[2 * a + 1] = numRows - numNonAggregatedNodes; // num local aggregated nodes
450  localStats[2 * a + 2] = aggregates->GetNumAggregates(); // num local aggregates
451  }
452  }
453  if (IsPrint(Statistics1)) {
454  std::vector<GO> globalStats(1 + 2 * algos_.size());
455  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, (int)localStats.size(), localStats.data(), globalStats.data());
456  GO numGlobalRows = globalStats[0];
457  GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
458  std::stringstream ss;
459  for (size_t a = 0; a < algos_.size(); a++) {
460  std::string phase = algos_[a]->description();
461  GO numGlobalAggregated = globalStats[2 * a + 1];
462  GO numGlobalAggs = globalStats[2 * a + 2];
463  GO numGlobalNonAggregatedNodes = numGlobalRows - numGlobalAggregatedPrev;
464  double aggPercent = 100 * as<double>(numGlobalAggregated) / as<double>(numGlobalRows);
465  if (aggPercent > 99.99 && aggPercent < 100.00) {
466  // Due to round off (for instance, for 140465733/140466897), we could
467  // get 100.00% display even if there are some remaining nodes. This
468  // is bad from the users point of view. It is much better to change
469  // it to display 99.99%.
470  aggPercent = 99.99;
471  }
472 
473  ss << "Algo \"" + phase + "\"" + (numGlobalNonAggregatedNodes == 0 ? " [skipped since no nodes are left to aggregate]" : "") << std::endl
474  << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
475  << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
476  << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
477  << " aggregates : " << numGlobalAggs - numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
478  numGlobalAggregatedPrev = numGlobalAggregated;
479  numGlobalAggsPrev = numGlobalAggs;
480  }
481  GetOStream(Statistics1) << ss.str();
482  }
483  }
484 
485  TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
486 
487  aggregates->AggregatesCrossProcessors(false);
488  aggregates->ComputeAggregateSizes(true /*forceRecompute*/);
489 
490  Set(currentLevel, "Aggregates", aggregates);
491 }
492 
493 template <class LocalOrdinal, class GlobalOrdinal, class Node>
495  DoGraphColoring(Level& currentLevel,
496  const std::string& aggAlgo,
497  const bool deterministic,
498  const RCP<const LWGraph_kokkos> graph,
499  RCP<Aggregates> aggregates) const {
500  SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel);
501 
502  // LBV on Sept 06 2019: the note below is a little worrisome,
503  // can we guarantee that MueLu is never used on a non-symmetric
504  // graph?
505  // note: just using colinds_view in place of scalar_view_t type
506  // (it won't be used at all by symbolic SPGEMM)
507  using graph_t = typename LWGraph_kokkos::local_graph_type;
508  using KernelHandle = KokkosKernels::Experimental::
509  KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
510  typename graph_t::entries_type::value_type,
511  typename graph_t::entries_type::value_type,
512  typename graph_t::device_type::execution_space,
513  typename graph_t::device_type::memory_space,
514  typename graph_t::device_type::memory_space>;
515  KernelHandle kh;
516  // leave gc algorithm choice as the default
517  kh.create_distance2_graph_coloring_handle();
518 
519  // get the distance-2 graph coloring handle
520  auto coloringHandle = kh.get_distance2_graph_coloring_handle();
521 
522  const LO numRows = graph->GetNodeNumVertices();
523 
524  // Set the distance-2 graph coloring algorithm to use.
525  // Options:
526  // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
527  // COLORING_D2_SERIAL - Use the legacy serial-only implementation
528  // COLORING_D2_VB - Use the parallel vertex based direct method
529  // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
530  // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
531  // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest)
532  if (deterministic) {
533  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
534  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
535  } else if (aggAlgo == "serial") {
536  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
537  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
538  } else if (aggAlgo == "default") {
539  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT);
540  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl;
541  } else if (aggAlgo == "vertex based") {
542  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB);
543  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl;
544  } else if (aggAlgo == "vertex based bit set") {
545  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT);
546  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl;
547  } else if (aggAlgo == "edge filtering") {
548  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF);
549  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl;
550  } else if (aggAlgo == "net based bit set") {
551  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT);
552  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl;
553  } else {
554  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
555  }
556 
557  // Create device views for graph rowptrs/colinds
558  typename graph_t::row_map_type aRowptrs = graph->getRowPtrs();
559  typename graph_t::entries_type aColinds = graph->getEntries();
560 
561  // run d2 graph coloring
562  // graph is symmetric so row map/entries and col map/entries are the same
563  {
564  SubFactoryMonitor sfm2(*this, "Algo \"Graph Coloring\": KokkosGraph Call", currentLevel); // CMS HACK
565  KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
566  }
567 
568  // extract the colors and store them in the aggregates
569  aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
570  aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
571 
572  // clean up coloring handle
573  kh.destroy_distance2_graph_coloring_handle();
574 }
575 
576 } // namespace MueLu
577 
578 #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
#define MUELU_UNASSIGNED
Algorithm for coarsening a graph with uncoupled aggregation. keep special marked nodes as singleton n...
RCP< MueLu::LWGraph< LocalOrdinal, GlobalOrdinal, Node > > copyToHost()
MueLu::DefaultLocalOrdinal LocalOrdinal
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void DoGraphColoring(Level &currentLevel, const std::string &aggAlgo, const bool deterministic, const RCP< const LWGraph_kokkos > graph, RCP< Aggregates > aggregates) const
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
KOKKOS_INLINE_FUNCTION row_type getRowPtrs() const
Return the row pointers of the local graph.
void SetGraphNumColors(const LO graphNumColors)
Set the number of colors needed by the distance 2 coloring.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
typename std::conditional< OnHost, typename local_graph_device_type::HostMirror, local_graph_device_type >::type local_graph_type
void setValidator(RCP< const ParameterEntryValidator > const &validator)
virtual ~UncoupledAggregationFactory()
Destructor.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
void DeclareInput(Level &currentLevel) const
Input.
static const NoFactory * get()
Algorithm for coarsening a graph with uncoupled aggregation. creates aggregates along an interface us...
Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might be ne...
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
#define MUELU_UNAGGREGATED
KOKKOS_INLINE_FUNCTION entries_type getEntries() const
Return the list entries in the local graph.
void SetGraphColors(colors_view_type graphColors)
Set a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
virtual void setObjectLabel(const std::string &objectLabel)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
#define SET_VALID_ENTRY(name)
Among unaggregated points, see if we can make a reasonable size aggregate out of it.IdeaAmong unaggregated points, see if we can make a reasonable size aggregate out of it. We do this by looking at neighbors and seeing how many are unaggregated and on my processor. Loosely, base the number of new aggregates created on the percentage of unaggregated nodes.
void Build(Level &currentLevel) const
Build aggregates.
Add leftovers to existing aggregatesIdeaIn phase 2b non-aggregated nodes are added to existing aggreg...
RCP< MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node > > copyToDevice()
const RCP< const Map > GetDomainMap() const
KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool &flag)
Record whether aggregates include DOFs from other processes.
Algorithm for coarsening a graph with uncoupled aggregation.
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
Handle leftover nodes. Try to avoid singleton nodesIdeaIn phase 3 we try to stick unaggregated nodes ...
ParameterEntry & getEntry(const std::string &name)
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const Teuchos::Comm< int > > GetComm() const
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.