MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12 
13 #include <Kokkos_Core.hpp>
14 #include <KokkosSparse_CrsMatrix.hpp>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 
19 #include "Xpetra_Matrix.hpp"
20 
22 
23 #include "MueLu_AmalgamationInfo.hpp"
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_Level.hpp"
26 #include "MueLu_LWGraph_kokkos.hpp"
27 #include "MueLu_MasterList.hpp"
28 #include "MueLu_Monitor.hpp"
29 
30 // #define MUELU_COALESCE_DROP_DEBUG 1
31 
34 #include "MueLu_CutDrop.hpp"
35 #include "MueLu_DroppingCommon.hpp"
38 
39 namespace MueLu {
40 
41 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43  RCP<ParameterList> validParamList = rcp(new ParameterList());
44 
45 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
46  SET_VALID_ENTRY("aggregation: use blocking");
47  SET_VALID_ENTRY("aggregation: drop tol");
48  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
49  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
50  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
51  SET_VALID_ENTRY("aggregation: row sum drop tol");
52  SET_VALID_ENTRY("aggregation: strength-of-connection: matrix");
53  SET_VALID_ENTRY("aggregation: strength-of-connection: measure");
54  SET_VALID_ENTRY("aggregation: drop scheme");
55  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
56  SET_VALID_ENTRY("aggregation: distance laplacian metric");
57  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
58  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
59 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
60  SET_VALID_ENTRY("aggregation: distance laplacian algo");
61  SET_VALID_ENTRY("aggregation: classical algo");
62 #endif
63  SET_VALID_ENTRY("aggregation: symmetrize graph after dropping");
64  SET_VALID_ENTRY("aggregation: coloring: use color graph");
65  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
66 
67  SET_VALID_ENTRY("filtered matrix: use lumping");
68  SET_VALID_ENTRY("filtered matrix: reuse graph");
69  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
70 
71  SET_VALID_ENTRY("filtered matrix: use root stencil");
72  SET_VALID_ENTRY("filtered matrix: use spread lumping");
73  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
74  SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
75  SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
76 
77 #undef SET_VALID_ENTRY
78  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
79 #ifndef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
80  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop"))));
81 #else
82  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("point-wise", "cut-drop", "signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical", "signed classical distance laplacian", "signed classical sa distance laplacian"))));
83  validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
84  validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
85 #endif
86  validParamList->getEntry("aggregation: strength-of-connection: matrix").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("A", "distance laplacian"))));
87  validParamList->getEntry("aggregation: strength-of-connection: measure").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("smoothed aggregation", "signed smoothed aggregation", "signed ruge-stueben", "unscaled"))));
88  validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
89 
90  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
91  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
92  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
93  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
94  validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
95 
96  return validParamList;
97 }
98 
99 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
101  Input(currentLevel, "A");
102  Input(currentLevel, "UnAmalgamationInfo");
103 
104  const ParameterList& pL = GetParameterList();
105 
106  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
107  bool needCoords = (socUsesMatrix == "distance laplacian");
108 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
109  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
110  needCoords |= (droppingMethod.find("distance laplacian") != std::string::npos);
111 #endif
112  if (needCoords) {
113  Input(currentLevel, "Coordinates");
114  std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
115  if (distLaplMetric == "material")
116  Input(currentLevel, "Material");
117  }
118 
119  bool useBlocking = pL.get<bool>("aggregation: use blocking");
120 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
121  useBlocking |= (droppingMethod.find("block diagonal") != std::string::npos);
122 #endif
123  if (useBlocking) {
124  Input(currentLevel, "BlockNumber");
125  }
126 }
127 
128 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
130  Build(Level& currentLevel) const {
131  auto A = Get<RCP<Matrix>>(currentLevel, "A");
132  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
133  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
134 
135  std::tuple<GlobalOrdinal, boundary_nodes_type> results;
136  if (blkSize == 1)
137  results = BuildScalar(currentLevel);
138  else
139  results = BuildVector(currentLevel);
140 
141  if (GetVerbLevel() & Statistics1) {
142  GlobalOrdinal numDropped = std::get<0>(results);
143  auto boundaryNodes = std::get<1>(results);
144 
145  GO numLocalBoundaryNodes = 0;
146  GO numGlobalBoundaryNodes = 0;
147 
148  Kokkos::parallel_reduce(
149  "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
150  KOKKOS_LAMBDA(const LO i, GO& n) {
151  if (boundaryNodes(i))
152  n++;
153  },
154  numLocalBoundaryNodes);
155 
156  auto comm = A->getRowMap()->getComm();
157  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
158 
159  GO numGlobalTotal = A->getGlobalNumEntries();
160  GO numGlobalDropped;
161  MueLu_sumAll(comm, numDropped, numGlobalDropped);
162 
163  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
164  if (numGlobalTotal != 0) {
165  GetOStream(Statistics1) << "Number of dropped entries: "
166  << numGlobalDropped << "/" << numGlobalTotal
167  << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
168  }
169  }
170 }
171 
172 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
175  auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
176 
177  if (IsPrint(Runtime0)) {
178  if (material->getNumVectors() == 1) {
179  GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
180  } else {
181  TEUCHOS_TEST_FOR_EXCEPTION(spatialDim * spatialDim != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
182  {
183  Teuchos::Array<Scalar> means(material->getNumVectors());
184  material->meanValue(means());
185  std::stringstream ss;
186  ss << "material tensor mean =" << std::endl;
187  size_t k = 0;
188  for (size_t i = 0; i < spatialDim; ++i) {
189  ss << " ";
190  for (size_t j = 0; j < spatialDim; ++j) {
191  ss << means[k] << " ";
192  ++k;
193  }
194  ss << std::endl;
195  }
196  GetOStream(Runtime0) << ss.str();
197  }
198  }
199  }
200 
201  return material;
202 }
203 
204 template <class magnitudeType>
205 void translateOldAlgoParam(const Teuchos::ParameterList& pL, std::string& droppingMethod, bool& useBlocking, std::string& socUsesMatrix, std::string& socUsesMeasure, bool& symmetrizeDroppedGraph, bool& generateColoringGraph, magnitudeType& threshold) {
206  std::set<std::string> validDroppingMethods = {"piece-wise", "cut-drop"};
207  if (validDroppingMethods.find(droppingMethod) == validDroppingMethods.end()) {
208  std::string algo = droppingMethod;
209  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
210  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
211 
212  // Remove prefix "block diagonal" from algo
213  if (algo.find("block diagonal") == 0) {
214  useBlocking = true;
215  algo = algo.substr(14);
216  if (algo != "") {
217  algo = algo.substr(1);
218  }
219  }
220 
221  if ((algo == "classical") || (algo == "signed classical sa") || (algo == "signed classical") || (algo == "colored signed classical")) {
222  socUsesMatrix = "A";
223 
224  if (algo == "classical") {
225  socUsesMeasure = "smoothed aggregation";
226  } else if (algo == "signed classical sa") {
227  socUsesMeasure = "signed smoothed aggregation";
228  } else if (algo == "signed classical") {
229  socUsesMeasure = "signed ruge-stueben";
230  } else if (algo == "colored signed classical") {
231  socUsesMeasure = "signed ruge-stueben";
232  generateColoringGraph = true;
233  }
234 
235  if (classicalAlgoStr == "default")
236  droppingMethod = "point-wise";
237  else if (classicalAlgoStr == "unscaled cut") {
238  socUsesMeasure = "unscaled";
239  droppingMethod = "cut-drop";
240  } else if (classicalAlgoStr == "scaled cut") {
241  droppingMethod = "cut-drop";
242  } else if (classicalAlgoStr == "scaled cut symmetric") {
243  droppingMethod = "cut-drop";
244  symmetrizeDroppedGraph = true;
245  }
246  } else if ((algo == "distance laplacian") || (algo == "signed classical sa distance laplacian") || (algo == "signed classical distance laplacian")) {
247  socUsesMatrix = "distance laplacian";
248 
249  if (algo == "distance laplacian") {
250  socUsesMeasure = "smoothed aggregation";
251  } else if (algo == "signed classical sa distance laplacian") {
252  socUsesMeasure = "signed smoothed aggregation";
253  } else if (algo == "signed classical distance laplacian") {
254  socUsesMeasure = "signed ruge-stueben";
255  }
256 
257  if (distanceLaplacianAlgoStr == "default")
258  droppingMethod = "point-wise";
259  else if (distanceLaplacianAlgoStr == "unscaled cut") {
260  socUsesMeasure = "unscaled";
261  droppingMethod = "cut-drop";
262  } else if (distanceLaplacianAlgoStr == "scaled cut") {
263  droppingMethod = "cut-drop";
264  } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
265  droppingMethod = "cut-drop";
266  symmetrizeDroppedGraph = true;
267  }
268  } else if (algo == "") {
269  // algo was "block diagonal", but we process and remove the "block diagonal" part
270  socUsesMatrix = "A";
272  }
273  }
274 }
275 
276 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
277 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
278  BuildScalar(Level& currentLevel) const {
279  FactoryMonitor m(*this, "Build", currentLevel);
280 
283  using local_matrix_type = typename MatrixType::local_matrix_type;
284  using local_graph_type = typename GraphType::local_graph_type;
285  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
286  using entries_type = typename local_graph_type::entries_type::non_const_type;
287  using values_type = typename local_matrix_type::values_type::non_const_type;
288  using device_type = typename Node::device_type;
289  using memory_space = typename device_type::memory_space;
290  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
291  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
292 
293  typedef Teuchos::ScalarTraits<Scalar> STS;
294  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
295 
296  auto A = Get<RCP<Matrix>>(currentLevel, "A");
297 
299  // Process parameterlist
300  const ParameterList& pL = GetParameterList();
301 
302  // Boundary detection
303  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
304  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
305  const LocalOrdinal dirichletNonzeroThreshold = 1;
306 
307  // Dropping
308  bool useBlocking = pL.get<bool>("aggregation: use blocking");
309  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
310  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
311  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
312  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
313  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
314  magnitudeType threshold;
315  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
316  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
317  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
318  else
319  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
320  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
321 
322  // Fill
323  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
324  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
325  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
326 
327  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
328  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
329 
330  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
331 
332  // coloring graph
333  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
334  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
335  const bool symmetrizeColoringGraph = true;
336 
337 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
338  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
339 #endif
340 
341  {
342  std::stringstream ss;
343  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
344  if (socUsesMatrix == "distance laplacian")
345  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
346  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
347  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
348 
349  GetOStream(Runtime0) << ss.str();
350  }
351 
352  TEUCHOS_ASSERT(!useRootStencil);
353  TEUCHOS_ASSERT(!useSpreadLumping);
354  if (droppingMethod == "cut-drop")
355  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
356 
358  // We perform four sweeps over the rows of A:
359  // Pass 1: detection of boundary nodes
360  // Pass 2: diagonal extraction
361  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
362  // Pass 4: fill of the filtered matrix
363  //
364  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
365 
366  // TODO: We could merge pass 1 and 2.
367 
368  auto crsA = toCrsMatrix(A);
369  auto lclA = crsA->getLocalMatrixDevice();
370  auto range = range_type(0, lclA.numRows());
371 
373  // Pass 1: Detect boundary nodes
374  //
375  // The following criteria are available:
376  // - BoundaryDetection::PointDirichletFunctor
377  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
378  // - BoundaryDetection::RowSumFunctor
379  // Marks rows as Dirichlet bases on row-sum criterion
380 
381  // Dirichlet nodes
382  auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
383  {
384  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
385 
386  // macro that applies boundary detection functors
387 #define MueLu_runBoundaryFunctors(...) \
388  { \
389  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
390  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
391  }
392 
393  auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
394 
395  if (rowSumTol <= 0.) {
396  MueLu_runBoundaryFunctors(dirichlet_detection);
397  } else {
398  auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
399  MueLu_runBoundaryFunctors(dirichlet_detection,
400  apply_rowsum);
401  }
402 #undef MueLu_runBoundaryFunctors
403  }
404  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
405  // Otherwise we're now done with it now.
406 
408  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
409  // rowptr of filtered matrix
410  //
411  // The following criteria are available:
412  // - Misc::PointwiseDropBoundaryFunctor
413  // Drop all rows that have been marked as Dirichlet
414  // - Misc::DropOffRankFunctor
415  // Drop all entries that are off-rank
416  // - ClassicalDropping::DropFunctor
417  // Classical dropping
418  // - DistanceLaplacian::DropFunctor
419  // Distance Laplacian dropping
420  // - Misc::KeepDiagonalFunctor
421  // Mark diagonal as KEEP
422  // - Misc::MarkSingletonFunctor
423  // Mark singletons after dropping as Dirichlet
424  // - Misc::BlockDiagonalizeFunctor
425  // Drop coupling between blocks
426  //
427  // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
428 
429  // Macro that applies dropping functors.
430  // If HAVE_MUELU_DEBUG is true, this runs additional debug checks.
431 #if !defined(HAVE_MUELU_DEBUG)
432 #define MueLu_runDroppingFunctorsImpl(...) \
433  { \
434  auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \
435  Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz_filtered); \
436  }
437 #else
438 #define MueLu_runDroppingFunctorsImpl(...) \
439  { \
440  auto debug = Misc::DebugFunctor(lclA, results); \
441  auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \
442  Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz_filtered); \
443  }
444 #endif
445 
446  // Macro that handles optional block diagonalization.
447  // Calls MueLu_runDroppingFunctorsImpl
448 #define MueLu_runDroppingFunctors(...) \
449  { \
450  if (useBlocking) { \
451  auto BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber"); \
452  auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *BlockNumber, results); \
453  MueLu_runDroppingFunctorsImpl(block_diagonalize, __VA_ARGS__); \
454  } else \
455  MueLu_runDroppingFunctorsImpl(__VA_ARGS__); \
456  }
457 
458  // Macro that runs dropping for SoC based on A itself, handling of droppingMethod.
459  // Calls MueLu_runDroppingFunctors
460 #define MueLu_runDroppingFunctors_on_A(SoC) \
461  { \
462  if (droppingMethod == "point-wise") { \
463  auto dropping = ClassicalDropping::make_drop_functor<SoC>(*A, threshold, results); \
464  \
465  if (aggregationMayCreateDirichlet) { \
466  MueLu_runDroppingFunctors(dropping, \
467  drop_boundaries, \
468  preserve_diagonals, \
469  mark_singletons_as_boundary); \
470  } else { \
471  MueLu_runDroppingFunctors(dropping, \
472  drop_boundaries, \
473  preserve_diagonals); \
474  } \
475  } else if (droppingMethod == "cut-drop") { \
476  auto comparison = CutDrop::make_comparison_functor<SoC>(*A, results); \
477  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
478  \
479  MueLu_runDroppingFunctors(drop_boundaries, \
480  preserve_diagonals, \
481  cut_drop); \
482  } \
483  }
484 
485  // Macro that runs on the distance Laplacian, handling of droppingMethod.
486  // Calls MueLu_runDroppingFunctors
487 #define MueLu_runDroppingFunctors_on_dlap_inner(SoC) \
488  { \
489  if (droppingMethod == "point-wise") { \
490  auto dist_laplacian_dropping = DistanceLaplacian::make_drop_functor<SoC>(*A, threshold, dist2, results); \
491  \
492  if (aggregationMayCreateDirichlet) { \
493  MueLu_runDroppingFunctors(dist_laplacian_dropping, \
494  drop_boundaries, \
495  preserve_diagonals, \
496  mark_singletons_as_boundary); \
497  } else { \
498  MueLu_runDroppingFunctors(dist_laplacian_dropping, \
499  drop_boundaries, \
500  preserve_diagonals); \
501  } \
502  } else if (droppingMethod == "cut-drop") { \
503  auto comparison = CutDrop::make_dlap_comparison_functor<SoC>(*A, dist2, results); \
504  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
505  \
506  MueLu_runDroppingFunctors(drop_boundaries, \
507  preserve_diagonals, \
508  cut_drop); \
509  } \
510  }
511 
512  // Macro that runs on the distance Laplacian, handling of distanceLaplacianMetric.
513  // Calls MueLu_runDroppingFunctors_on_dlap_inner
514 #define MueLu_runDroppingFunctors_on_dlap(SoC) \
515  { \
516  if (distanceLaplacianMetric == "unweighted") { \
517  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords); \
518  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
519  } else if (distanceLaplacianMetric == "material") { \
520  auto material = GetMaterial(currentLevel, coords->getNumVectors()); \
521  if (material->getNumVectors() == 1) { \
522  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material); \
523  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
524  } else { \
525  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material); \
526  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
527  } \
528  } \
529  }
530 
531  // rowptr of filtered A
532  auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
533  // Number of nonzeros of filtered A
534  LocalOrdinal nnz_filtered = 0;
535  // dropping decisions for each entry
536  auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
537  {
538  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
539 
540  auto drop_boundaries = Misc::PointwiseDropBoundaryFunctor(lclA, boundaryNodes, results);
541 
542  if (threshold != zero) {
543  auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
544  auto mark_singletons_as_boundary = Misc::MarkSingletonFunctor(lclA, boundaryNodes, results);
545 
546  if (socUsesMatrix == "A") {
547  if (socUsesMeasure == "unscaled") {
549  } else if (socUsesMeasure == "smoothed aggregation") {
551  } else if (socUsesMeasure == "signed ruge-stueben") {
553  } else if (socUsesMeasure == "signed smoothed aggregation") {
555  }
556  } else if (socUsesMatrix == "distance laplacian") {
557  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
558  if (socUsesMeasure == "unscaled") {
560  } else if (socUsesMeasure == "smoothed aggregation") {
562  } else if (socUsesMeasure == "signed ruge-stueben") {
564  } else if (socUsesMeasure == "signed smoothed aggregation") {
566  }
567  }
568  } else {
569  Kokkos::deep_copy(results, KEEP);
570  // FIXME: This seems inconsistent
571  // MueLu_runDroppingFunctors(drop_boundaries);
572  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
574  }
575 
576  if (symmetrizeDroppedGraph) {
577  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
578  MueLu_runDroppingFunctors(symmetrize);
579  }
580  }
581  GO numDropped = lclA.nnz() - nnz_filtered;
582  // We now know the number of entries of filtered A and have the final rowptr.
583 
585  // Pass 4: Create local matrix for filtered A
586  //
587  // Dropped entries are optionally lumped to the diagonal.
588 
589  RCP<Matrix> filteredA;
590  RCP<LWGraph_kokkos> graph;
591  {
592  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
593 
594  local_matrix_type lclFilteredA;
595  local_graph_type lclGraph;
596  if (reuseGraph) {
597  filteredA = MatrixFactory::BuildCopy(A);
598  lclFilteredA = filteredA->getLocalMatrixDevice();
599 
600  auto colidx = entries_type("entries", nnz_filtered);
601  lclGraph = local_graph_type(colidx, filtered_rowptr);
602  } else {
603  auto colidx = entries_type("entries", nnz_filtered);
604  auto values = values_type("values", nnz_filtered);
605  lclFilteredA = local_matrix_type("filteredA",
606  lclA.numRows(), lclA.numCols(),
607  nnz_filtered,
608  values, filtered_rowptr, colidx);
609  }
610 
611  if (lumping) {
612  if (reuseGraph) {
613  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
614  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
615  } else {
616  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, true>(lclA, results, lclFilteredA, filteringDirichletThreshold);
617  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
618  }
619  } else {
620  if (reuseGraph) {
621  auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
622  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
623  } else {
624  auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, false>(lclA, results, lclFilteredA, filteringDirichletThreshold);
625  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
626  }
627  }
628 
629  if (!reuseGraph)
630  filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
631  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
632 
633  if (reuseEigenvalue) {
634  // Reuse max eigenvalue from A
635  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
636  // the D^{-1}A estimate in A, may as well use it.
637  // NOTE: ML does that too
638  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
639  } else {
640  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
641  }
642 
643  if (!reuseGraph) {
644  // Use graph of filteredA as graph.
645  lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
646  }
647  graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
648  graph->SetBoundaryNodeMap(boundaryNodes);
649  }
650 
651  // Construct a second graph for coloring
652  if (generateColoringGraph) {
653  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
654 
655  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
656  if (localizeColoringGraph) {
657  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
658  MueLu_runDroppingFunctors(drop_offrank);
659  }
660  if (symmetrizeColoringGraph) {
661  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
662  MueLu_runDroppingFunctors(symmetrize);
663  }
664  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
665  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
666  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
667  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
668 
669  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
670  Set(currentLevel, "Coloring Graph", colorGraph);
671  }
672 
673 #undef MueLu_runDroppingFunctors_on_A
674 #undef MueLu_runDroppingFunctors_on_dlap_inner
675 #undef MueLu_runDroppingFunctors_on_dlap
676 #undef MueLu_runDroppingFunctors
677 #undef MueLu_runDroppingFunctorsImpl
678 
679  LO dofsPerNode = 1;
680  Set(currentLevel, "DofsPerNode", dofsPerNode);
681  Set(currentLevel, "Graph", graph);
682  Set(currentLevel, "A", filteredA);
683 
684  return std::make_tuple(numDropped, boundaryNodes);
685 }
686 
687 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
688 std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
689  BuildVector(Level& currentLevel) const {
690  FactoryMonitor m(*this, "Build", currentLevel);
691 
694  using local_matrix_type = typename MatrixType::local_matrix_type;
695  using local_graph_type = typename GraphType::local_graph_type;
696  using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
697  using entries_type = typename local_graph_type::entries_type::non_const_type;
698  using values_type = typename local_matrix_type::values_type::non_const_type;
699  using device_type = typename Node::device_type;
700  using memory_space = typename device_type::memory_space;
701  using magnitudeType = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
702  using doubleMultiVector = Xpetra::MultiVector<magnitudeType, LO, GO, NO>;
703 
704  typedef Teuchos::ScalarTraits<Scalar> STS;
705  const magnitudeType zero = Teuchos::ScalarTraits<magnitudeType>::zero();
706 
707  auto A = Get<RCP<Matrix>>(currentLevel, "A");
708 
709  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
710  blkSize is the number of storage blocks that must kept together during the amalgamation process.
711 
712  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
713 
714  numPDEs = blkSize * storageblocksize.
715 
716  If numPDEs==1
717  Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
718  No other values makes sense.
719 
720  If numPDEs>1
721  If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
722  If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
723  Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
724  */
725 
726  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
727  LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
728 
729  auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
730 
731  const RCP<const Map> rowMap = A->getRowMap();
732  const RCP<const Map> colMap = A->getColMap();
733 
734  // build a node row map (uniqueMap = non-overlapping) and a node column map
735  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
736  // stored in the AmalgamationInfo class container contain the local node id
737  // given a local dof id. The data is calculated in the AmalgamationFactory and
738  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
739  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
740  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
741  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
742  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
743 
744  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
745  rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
746  Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
747  colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
748 
749  // get number of local nodes
750  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
751  typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
752  id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
753  id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
754  Kokkos::deep_copy(rowTranslation, rowTranslationView);
755  Kokkos::deep_copy(colTranslation, colTranslationView);
756 
757  // extract striding information
758  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
759  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
760  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
761  if (A->IsView("stridedMaps") == true) {
762  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
763  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
764  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
765  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
766  blkId = strMap->getStridedBlockId();
767  if (blkId > -1)
768  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
769  }
770 
771  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
772 
774  // Process parameterlist
775  const ParameterList& pL = GetParameterList();
776 
777  // Boundary detection
778  const magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
779  const magnitudeType rowSumTol = as<magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
780  const LocalOrdinal dirichletNonzeroThreshold = 1;
781  const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
782  TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
783 
784  // Dropping
785  bool useBlocking = pL.get<bool>("aggregation: use blocking");
786  std::string droppingMethod = pL.get<std::string>("aggregation: drop scheme");
787  std::string socUsesMatrix = pL.get<std::string>("aggregation: strength-of-connection: matrix");
788  std::string socUsesMeasure = pL.get<std::string>("aggregation: strength-of-connection: measure");
789  std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
790  bool symmetrizeDroppedGraph = pL.get<bool>("aggregation: symmetrize graph after dropping");
791  magnitudeType threshold;
792  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
793  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
794  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
795  else
796  threshold = as<magnitudeType>(pL.get<double>("aggregation: drop tol"));
797  bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
798 
799  // Fill
800  const bool lumping = pL.get<bool>("filtered matrix: use lumping");
801  const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
802  const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
803 
804  const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
805  const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
806 
807  const magnitudeType filteringDirichletThreshold = as<magnitudeType>(pL.get<double>("filtered matrix: Dirichlet threshold"));
808 
809  // coloring graph
810  bool generateColoringGraph = pL.get<bool>("aggregation: coloring: use color graph");
811  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
812  const bool symmetrizeColoringGraph = true;
813 
814 #ifdef HAVE_MUELU_COALESCEDROP_ALLOW_OLD_PARAMETERS
815  translateOldAlgoParam(pL, droppingMethod, useBlocking, socUsesMatrix, socUsesMeasure, symmetrizeDroppedGraph, generateColoringGraph, threshold);
816 #endif
817  {
818  std::stringstream ss;
819  ss << "dropping scheme = \"" << droppingMethod << "\", strength-of-connection measure = \"" << socUsesMeasure << "\", strength-of-connection matrix = \"" << socUsesMatrix << "\", ";
820  if (socUsesMatrix == "distance laplacian")
821  ss << "distance laplacian metric = \"" << distanceLaplacianMetric << "\", ";
822  ss << "threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << ", useBlocking = " << useBlocking;
823  ss << ", symmetrizeDroppedGraph = " << symmetrizeDroppedGraph << std::endl;
824 
825  GetOStream(Runtime0) << ss.str();
826  }
827 
828  TEUCHOS_ASSERT(!useRootStencil);
829  TEUCHOS_ASSERT(!useSpreadLumping);
830  if (droppingMethod == "cut-drop")
831  TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
832 
834  // We perform four sweeps over the rows of A:
835  // Pass 1: detection of boundary nodes
836  // Pass 2: diagonal extraction
837  // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
838  // Pass 4: fill of the filtered matrix
839  //
840  // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
841 
842  // TODO: We could merge pass 1 and 2.
843 
844  auto crsA = toCrsMatrix(A);
845  auto lclA = crsA->getLocalMatrixDevice();
846  auto range = range_type(0, numNodes);
847 
849  // Pass 1: Detect boundary nodes
850  //
851  // The following criteria are available:
852  // - BoundaryDetection::VectorDirichletFunctor
853  // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
854 
855  // Dirichlet nodes
856  auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
857  {
858  SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
859 
860 #define MueLu_runBoundaryFunctors(...) \
861  { \
862  auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
863  Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
864  }
865 
866  if (useGreedyDirichlet) {
867  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
868  MueLu_runBoundaryFunctors(dirichlet_detection);
869  } else {
870  auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
871  MueLu_runBoundaryFunctors(dirichlet_detection);
872  }
873 #undef MueLu_runBoundaryFunctors
874  }
875  // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
876  // Otherwise we're now done with it now.
877 
879  // Pass 2 & 3: Diagonal extraction and determine dropping and construct
880  // rowptr of filtered matrix
881  //
882  // The following criteria are available:
883  // - Misc::VectorDropBoundaryFunctor
884  // Drop all rows that have been marked as Dirichlet
885  // - Misc::DropOffRankFunctor
886  // Drop all entries that are off-rank
887  // - ClassicalDropping::DropFunctor
888  // Classical dropping
889  // - DistanceLaplacian::VectorDropFunctor
890  // Distance Laplacian dropping
891  // - Misc::KeepDiagonalFunctor
892  // Mark diagonal as KEEP
893  // - Misc::MarkSingletonFunctor
894  // Mark singletons after dropping as Dirichlet
895 
896 #if !defined(HAVE_MUELU_DEBUG)
897 #define MueLu_runDroppingFunctorsImpl(...) \
898  { \
899  auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \
900  Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz); \
901  }
902 #else
903 #define MueLu_runDroppingFunctorsImpl(...) \
904  { \
905  auto debug = Misc::DebugFunctor(lclA, results); \
906  auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \
907  Kokkos::parallel_scan("MueLu::CoalesceDrop::CountEntries", range, countingFunctor, nnz); \
908  }
909 #endif
910 
911  // Macro that handles optional block diagonalization.
912  // Calls MueLu_runDroppingFunctorsImpl
913 #define MueLu_runDroppingFunctors(...) \
914  { \
915  if (useBlocking) { \
916  auto BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber"); \
917  auto block_diagonalize = Misc::BlockDiagonalizeVectorFunctor(*A, *BlockNumber, nonUniqueMap, results, rowTranslation, colTranslation); \
918  MueLu_runDroppingFunctorsImpl(block_diagonalize, __VA_ARGS__); \
919  } else \
920  MueLu_runDroppingFunctorsImpl(__VA_ARGS__); \
921  }
922 
923  // Macro that runs dropping for SoC based on A itself, handling of droppingMethod.
924  // Calls MueLu_runDroppingFunctors
925 #define MueLu_runDroppingFunctors_on_A(SoC) \
926  { \
927  if (droppingMethod == "point-wise") { \
928  auto dropping = ClassicalDropping::make_drop_functor<SoC>(*A, threshold, results); \
929  \
930  if (aggregationMayCreateDirichlet) { \
931  MueLu_runDroppingFunctors(dropping, \
932  preserve_diagonals, \
933  mark_singletons_as_boundary); \
934  } else { \
935  MueLu_runDroppingFunctors(dropping, \
936  preserve_diagonals); \
937  } \
938  } else if (droppingMethod == "cut-drop") { \
939  auto comparison = CutDrop::make_comparison_functor<SoC>(*A, results); \
940  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
941  \
942  MueLu_runDroppingFunctors(drop_boundaries, \
943  preserve_diagonals, \
944  cut_drop); \
945  } \
946  }
947 
948  // Macro that runs on the distance Laplacian, handling of droppingMethod.
949  // Calls MueLu_runDroppingFunctors
950 #define MueLu_runDroppingFunctors_on_dlap_inner(SoC) \
951  { \
952  if (droppingMethod == "point-wise") { \
953  auto dist_laplacian_dropping = DistanceLaplacian::make_vector_drop_functor<SoC>(*A, *mergedA, threshold, dist2, results, rowTranslation, colTranslation); \
954  \
955  if (aggregationMayCreateDirichlet) { \
956  MueLu_runDroppingFunctors(dist_laplacian_dropping, \
957  preserve_diagonals, \
958  mark_singletons_as_boundary); \
959  } else { \
960  MueLu_runDroppingFunctors(dist_laplacian_dropping, \
961  preserve_diagonals); \
962  } \
963  } else if (droppingMethod == "cut-drop") { \
964  auto comparison = CutDrop::make_dlap_comparison_functor<SoC>(*A, dist2, results); \
965  auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold); \
966  \
967  MueLu_runDroppingFunctors(drop_boundaries, \
968  preserve_diagonals, \
969  cut_drop); \
970  } \
971  }
972 
973  // Macro that runs on the distance Laplacian, handling of distanceLaplacianMetric.
974  // Calls MueLu_runDroppingFunctors_on_dlap_inner
975 #define MueLu_runDroppingFunctors_on_dlap(SoC) \
976  { \
977  if (distanceLaplacianMetric == "unweighted") { \
978  auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*mergedA, coords); \
979  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
980  } else if (distanceLaplacianMetric == "weighted") { \
981  auto k_dlap_weights_host = Kokkos::View<double*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&dlap_weights[0], dlap_weights.size()); \
982  auto k_dlap_weights = Kokkos::View<double*>("dlap_weights", k_dlap_weights_host.extent(0)); \
983  Kokkos::deep_copy(k_dlap_weights, k_dlap_weights_host); \
984  auto dist2 = DistanceLaplacian::WeightedDistanceFunctor(*mergedA, coords, k_dlap_weights); \
985  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
986  } else if (distanceLaplacianMetric == "block weighted") { \
987  auto k_dlap_weights_host = Kokkos::View<double*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&dlap_weights[0], dlap_weights.size()); \
988  auto k_dlap_weights = Kokkos::View<double*>("dlap_weights", k_dlap_weights_host.extent(0)); \
989  Kokkos::deep_copy(k_dlap_weights, k_dlap_weights_host); \
990  auto dist2 = DistanceLaplacian::BlockWeightedDistanceFunctor(*mergedA, coords, k_dlap_weights, interleaved_blocksize); \
991  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
992  } else if (distanceLaplacianMetric == "material") { \
993  auto material = GetMaterial(currentLevel, coords->getNumVectors()); \
994  if (material->getNumVectors() == 1) { \
995  auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*mergedA, coords, material); \
996  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
997  } else { \
998  auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*mergedA, coords, material); \
999  MueLu_runDroppingFunctors_on_dlap_inner(SoC); \
1000  } \
1001  } \
1002  }
1003 
1004  // rowptr of filtered A
1005  auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
1006  auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
1007  // Number of nonzeros of filtered A and graph
1008  Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
1009 
1010  // dropping decisions for each entry
1011  auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
1012  {
1013  SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
1014 
1015  auto drop_boundaries = Misc::VectorDropBoundaryFunctor(lclA, rowTranslation, boundaryNodes, results);
1016 
1017  if (threshold != zero) {
1018  auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
1019  auto mark_singletons_as_boundary = Misc::MarkSingletonVectorFunctor(lclA, rowTranslation, boundaryNodes, results);
1020 
1021  if (socUsesMatrix == "A") {
1022  if (socUsesMeasure == "unscaled") {
1024  } else if (socUsesMeasure == "smoothed aggregation") {
1026  } else if (socUsesMeasure == "signed ruge-stueben") {
1028  } else if (socUsesMeasure == "signed smoothed aggregation") {
1030  }
1031  } else if (socUsesMatrix == "distance laplacian") {
1032  auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
1033 
1034  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
1035  LocalOrdinal interleaved_blocksize = as<LocalOrdinal>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
1036  if (socUsesMeasure == "distance laplacian") {
1037  LO dim = (LO)coords->getNumVectors();
1038  // If anything isn't 1.0 we need to turn on the weighting
1039  bool non_unity = false;
1040  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
1041  if (dlap_weights[i] != 1.0) {
1042  non_unity = true;
1043  }
1044  }
1045  if (non_unity) {
1046  if ((LO)dlap_weights.size() == dim) {
1047  distanceLaplacianMetric = "weighted";
1048  } else if ((LO)dlap_weights.size() == interleaved_blocksize * dim)
1049  distanceLaplacianMetric = "block weighted";
1050  else {
1051  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
1052  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
1053  }
1054  if (GetVerbLevel() & Statistics1)
1055  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
1056  }
1057  }
1058 
1059  RCP<Matrix> mergedA;
1060  {
1061  // Construct merged A.
1062 
1063  auto merged_rowptr = rowptr_type("rowptr", numNodes + 1);
1064  LocalOrdinal nnz_merged = 0;
1065 
1066  auto functor = MatrixConstruction::MergeCountFunctor(lclA, blkPartSize, colTranslation, merged_rowptr);
1067  Kokkos::parallel_scan("MergeCount", range, functor, nnz_merged);
1068 
1069  local_graph_type lclMergedGraph;
1070  auto colidx_merged = entries_type("entries", nnz_merged);
1071  auto values_merged = values_type("values", nnz_merged);
1072 
1073  local_matrix_type lclMergedA = local_matrix_type("mergedA",
1074  numNodes, nonUniqueMap->getLocalNumElements(),
1075  nnz_merged,
1076  values_merged, merged_rowptr, colidx_merged);
1077 
1078  auto fillFunctor = MatrixConstruction::MergeFillFunctor<local_matrix_type>(lclA, blkSize, colTranslation, lclMergedA);
1079  Kokkos::parallel_for("MueLu::CoalesceDrop::MergeFill", range, fillFunctor);
1080 
1081  mergedA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclMergedA, uniqueMap, nonUniqueMap, uniqueMap, uniqueMap);
1082  }
1083 
1084  if (socUsesMeasure == "unscaled") {
1086  } else if (socUsesMeasure == "smoothed aggregation") {
1088  } else if (socUsesMeasure == "signed ruge-stueben") {
1090  } else if (socUsesMeasure == "signed smoothed aggregation") {
1092  }
1093  }
1094  } else {
1095  Kokkos::deep_copy(results, KEEP);
1096  // MueLu_runDroppingFunctors(drop_boundaries);
1097  auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
1099  }
1100 
1101  if (symmetrizeDroppedGraph) {
1102  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
1103  MueLu_runDroppingFunctors(symmetrize);
1104  }
1105  }
1106  LocalOrdinal nnz_filtered = nnz.first;
1107  LocalOrdinal nnz_graph = nnz.second;
1108  GO numTotal = lclA.nnz();
1109  GO numDropped = numTotal - nnz_filtered;
1110  // We now know the number of entries of filtered A and have the final rowptr.
1111 
1113  // Pass 4: Create local matrix for filtered A
1114  //
1115  // Dropped entries are optionally lumped to the diagonal.
1116 
1117  RCP<Matrix> filteredA;
1118  RCP<LWGraph_kokkos> graph;
1119  {
1120  SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
1121 
1122  local_matrix_type lclFilteredA;
1123  if (reuseGraph) {
1124  lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
1125  } else {
1126  auto colidx = entries_type("entries", nnz_filtered);
1127  auto values = values_type("values", nnz_filtered);
1128  lclFilteredA = local_matrix_type("filteredA",
1129  lclA.numRows(), lclA.numCols(),
1130  nnz_filtered,
1131  values, filtered_rowptr, colidx);
1132  }
1133 
1134  local_graph_type lclGraph;
1135  {
1136  auto colidx = entries_type("entries", nnz_graph);
1137  lclGraph = local_graph_type(colidx, graph_rowptr);
1138  }
1139 
1140  if (lumping) {
1141  if (reuseGraph) {
1142  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1143  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
1144  } else {
1145  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1146  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
1147  }
1148  } else {
1149  if (reuseGraph) {
1150  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1151  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
1152  } else {
1153  auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1154  Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
1155  }
1156  }
1157 
1158  filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
1159  filteredA->SetFixedBlockSize(blkSize);
1160 
1161  if (reuseEigenvalue) {
1162  // Reuse max eigenvalue from A
1163  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
1164  // the D^{-1}A estimate in A, may as well use it.
1165  // NOTE: ML does that too
1166  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1167  } else {
1168  filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
1169  }
1170 
1171  graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1172  graph->SetBoundaryNodeMap(boundaryNodes);
1173  }
1174 
1175  // Construct a second graph for coloring
1176  if (generateColoringGraph) {
1177  SubFactoryMonitor mColoringGraph(*this, "Construct coloring graph", currentLevel);
1178 
1179  filtered_rowptr = rowptr_type("rowptr_coloring_graph", lclA.numRows() + 1);
1180  graph_rowptr = rowptr_type("rowptr", numNodes + 1);
1181  if (localizeColoringGraph) {
1182  auto drop_offrank = Misc::DropOffRankFunctor(lclA, results);
1183  MueLu_runDroppingFunctors(drop_offrank);
1184  }
1185  if (symmetrizeColoringGraph) {
1186  auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
1187  MueLu_runDroppingFunctors(symmetrize);
1188  }
1189  auto colidx = entries_type("entries_coloring_graph", nnz_filtered);
1190  auto lclGraph = local_graph_type(colidx, filtered_rowptr);
1191  auto graphConstruction = MatrixConstruction::GraphConstruction<local_matrix_type, local_graph_type>(lclA, results, lclGraph);
1192  Kokkos::parallel_for("MueLu::CoalesceDrop::Construct_coloring_graph", range, graphConstruction);
1193 
1194  auto colorGraph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "coloring graph"));
1195  Set(currentLevel, "Coloring Graph", colorGraph);
1196  }
1197 
1198 #undef MueLu_runDroppingFunctors_on_dlap
1199 #undef MueLu_runDroppingFunctors_on_dlap_inner
1200 #undef MueLu_runDroppingFunctors_on_A
1201 #undef MueLu_runDroppingFunctors
1202 #undef MueLu_runDroppingFunctorsImpl
1203 
1204  LO dofsPerNode = blkSize;
1205 
1206  Set(currentLevel, "DofsPerNode", dofsPerNode);
1207  Set(currentLevel, "Graph", graph);
1208  Set(currentLevel, "A", filteredA);
1209 
1210  return std::make_tuple(numDropped, boundaryNodes);
1211 }
1212 
1213 } // namespace MueLu
1214 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
Lightweight MueLu representation of a compressed row storage graph.
void translateOldAlgoParam(const Teuchos::ParameterList &pL, std::string &droppingMethod, bool &useBlocking, std::string &socUsesMatrix, std::string &socUsesMeasure, bool &symmetrizeDroppedGraph, bool &generateColoringGraph, magnitudeType &threshold)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
Teuchos::RCP< MultiVector > GetMaterial(Level &currentLevel, size_t spatialDim) const
void setValidator(RCP< const ParameterEntryValidator > const &validator)
Functor that drops boundary nodes for a blockSize &gt; 1 problem.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define MueLu_runDroppingFunctors_on_dlap(SoC)
Print more statistics.
#define MueLu_runBoundaryFunctors(...)
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Functor that symmetrizes the dropping decisions.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that drops off-rank entries.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
void DeclareInput(Level &currentLevel) const
Input.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
Functor for marking nodes as Dirichlet.
Functor that marks diagonal as kept, unless the are already marked as boundary.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
#define MueLu_runDroppingFunctors_on_A(SoC)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
Functor for marking nodes as Dirichlet based on rowsum.
size_type size() const
#define SET_VALID_ENTRY(name)
Functor for marking nodes as Dirichlet in a block operator.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
ParameterEntry & getEntry(const std::string &name)
#define MueLu_runDroppingFunctors(...)
bool is_null() const