MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationPhase2bAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
53 
54 #include <Xpetra_Vector.hpp>
55 
57 
58 #include "MueLu_Aggregates_kokkos.hpp"
59 #include "MueLu_Exceptions.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "MueLu_Monitor.hpp"
62 
63 namespace MueLu {
64 
65  // Try to stick unaggregated nodes into a neighboring aggregate if they are
66  // not already too big
67  template <class LocalOrdinal, class GlobalOrdinal, class Node>
68  void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69  BuildAggregates(const ParameterList& params,
70  const LWGraph_kokkos& graph,
71  Aggregates_kokkos& aggregates,
73  LO& numNonAggregatedNodes) const {
74 
75  if(params.get<bool>("aggregation: deterministic")) {
76  Monitor m(*this, "BuildAggregatesDeterministic");
77  BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
78  } else {
79  Monitor m(*this, "BuildAggregatesRandom");
80  BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
81  }
82 
83  } // BuildAggregates
84 
85  template <class LO, class GO, class Node>
86  void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
87  BuildAggregatesRandom(const ParameterList& params,
88  const LWGraph_kokkos& graph,
89  Aggregates_kokkos& aggregates,
91  LO& numNonAggregatedNodes) const {
92  using memory_space = typename LWGraph_kokkos::memory_space;
93  using execution_space = typename LWGraph_kokkos::execution_space;
94 
95  const LO numRows = graph.GetNodeNumVertices();
96  const int myRank = graph.GetComm()->getRank();
97 
98  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
99  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
100  auto colors = aggregates.GetGraphColors();
101  const LO numColors = aggregates.GetGraphNumColors();
102  const LO numLocalAggregates = aggregates.GetNumAggregates();
103 
104  const LO defaultConnectWeight = 100;
105  const LO penaltyConnectWeight = 10;
106 
107  Kokkos::View<LO*, memory_space> aggWeight ("aggWeight", numLocalAggregates);
108  Kokkos::View<LO*, memory_space> connectWeight("connectWeight", numRows);
109  Kokkos::View<LO*, memory_space> aggPenalties ("aggPenalties", numLocalAggregates);
110 
111  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
112 
113  // taw: by running the aggregation routine more than once there is a chance that also
114  // non-aggregated nodes with a node distance of two are added to existing aggregates.
115  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
116  // should be sufficient.
117  // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
118  // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
119  // is needed to reach distance 2 neighbors.
120  int maxIters = 2;
121  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
122  if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
123  for (int iter = 0; iter < maxIters; ++iter) {
124  for(LO color = 1; color <= numColors; ++color) {
125  Kokkos::deep_copy(aggWeight, 0);
126 
127  //the reduce counts how many nodes are aggregated by this phase,
128  //which will then be subtracted from numNonAggregatedNodes
129  LO numAggregated = 0;
130  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
132  KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated) {
133  if (aggStat(i) != READY || colors(i) != color)
134  return;
135 
136  auto neighOfINode = graph.getNeighborVertices(i);
137  for (int j = 0; j < neighOfINode.length; j++) {
138  LO neigh = neighOfINode(j);
139 
140  // We don't check (neigh != i), as it is covered by checking
141  // (aggStat[neigh] == AGGREGATED)
142  if (graph.isLocalNeighborVertex(neigh) &&
143  aggStat(neigh) == AGGREGATED)
144  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
145  connectWeight(neigh));
146  }
147 
148  int bestScore = -100000;
149  int bestAggId = -1;
150  int bestConnect = -1;
151 
152  for (int j = 0; j < neighOfINode.length; j++) {
153  LO neigh = neighOfINode(j);
154 
155  if (graph.isLocalNeighborVertex(neigh) &&
156  aggStat(neigh) == AGGREGATED) {
157  auto aggId = vertex2AggId(neigh, 0);
158  int score = aggWeight(aggId) - aggPenalties(aggId);
159 
160  if (score > bestScore) {
161  bestAggId = aggId;
162  bestScore = score;
163  bestConnect = connectWeight(neigh);
164 
165  } else if (aggId == bestAggId &&
166  connectWeight(neigh) > bestConnect) {
167  bestConnect = connectWeight(neigh);
168  }
169  }
170  }
171  if (bestScore >= 0) {
172  aggStat(i) = AGGREGATED;
173  vertex2AggId(i, 0) = bestAggId;
174  procWinner(i, 0) = myRank;
175 
176  Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
177  connectWeight(i) = bestConnect - penaltyConnectWeight;
178  tmpNumAggregated++;
179  }
180  }, numAggregated); //parallel_for
181  numNonAggregatedNodes -= numAggregated;
182  }
183  } // loop over maxIters
184 
185  } // BuildAggregatesRandom
186 
187 
188 
189  template <class LO, class GO, class Node>
190  void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
191  BuildAggregatesDeterministic(const ParameterList& params,
192  const LWGraph_kokkos& graph,
193  Aggregates_kokkos& aggregates,
195  LO& numNonAggregatedNodes) const {
196  using memory_space = typename LWGraph_kokkos::memory_space;
197  using execution_space = typename LWGraph_kokkos::execution_space;
198 
199  const LO numRows = graph.GetNodeNumVertices();
200  const int myRank = graph.GetComm()->getRank();
201 
202  auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
203  auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
204  auto colors = aggregates.GetGraphColors();
205  const LO numColors = aggregates.GetGraphNumColors();
206  LO numLocalAggregates = aggregates.GetNumAggregates();
207 
208  const int defaultConnectWeight = 100;
209  const int penaltyConnectWeight = 10;
210 
211  Kokkos::View<int*, memory_space> connectWeight ("connectWeight", numRows);
212  Kokkos::View<int*, memory_space> aggWeight ("aggWeight", numLocalAggregates);
213  Kokkos::View<int*, memory_space> aggPenaltyUpdates("aggPenaltyUpdates", numLocalAggregates);
214  Kokkos::View<int*, memory_space> aggPenalties ("aggPenalties", numLocalAggregates);
215 
216  Kokkos::deep_copy(connectWeight, defaultConnectWeight);
217 
218  // We do this cycle twice.
219  // I don't know why, but ML does it too
220  // taw: by running the aggregation routine more than once there is a chance that also
221  // non-aggregated nodes with a node distance of two are added to existing aggregates.
222  // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
223  // should be sufficient.
224  int maxIters = 2;
225  int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
226  if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
227  for (int iter = 0; iter < maxIters; ++iter) {
228  for(LO color = 1; color <= numColors; color++) {
229  Kokkos::deep_copy(aggWeight, 0);
230 
231  //the reduce counts how many nodes are aggregated by this phase,
232  //which will then be subtracted from numNonAggregatedNodes
233  LO numAggregated = 0;
234  Kokkos::parallel_for("Aggregation Phase 2b: updating agg weights",
236  KOKKOS_LAMBDA (const LO i)
237  {
238  if (aggStat(i) != READY || colors(i) != color)
239  return;
240  auto neighOfINode = graph.getNeighborVertices(i);
241  for (int j = 0; j < neighOfINode.length; j++) {
242  LO neigh = neighOfINode(j);
243  // We don't check (neigh != i), as it is covered by checking
244  // (aggStat[neigh] == AGGREGATED)
245  if (graph.isLocalNeighborVertex(neigh) &&
246  aggStat(neigh) == AGGREGATED)
247  Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
248  connectWeight(neigh));
249  }
250  });
251  execution_space().fence();
252  Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
254  KOKKOS_LAMBDA (const LO i, LO& tmpNumAggregated)
255  {
256  if (aggStat(i) != READY || colors(i) != color)
257  return;
258  int bestScore = -100000;
259  int bestAggId = -1;
260  int bestConnect = -1;
261 
262  auto neighOfINode = graph.getNeighborVertices(i);
263  for (int j = 0; j < neighOfINode.length; j++) {
264  LO neigh = neighOfINode(j);
265 
266  if (graph.isLocalNeighborVertex(neigh) &&
267  aggStat(neigh) == AGGREGATED) {
268  auto aggId = vertex2AggId(neigh, 0);
269  int score = aggWeight(aggId) - aggPenalties(aggId);
270 
271  if (score > bestScore) {
272  bestAggId = aggId;
273  bestScore = score;
274  bestConnect = connectWeight(neigh);
275 
276  } else if (aggId == bestAggId &&
277  connectWeight(neigh) > bestConnect) {
278  bestConnect = connectWeight(neigh);
279  }
280  }
281  }
282  if (bestScore >= 0) {
283  aggStat(i) = AGGREGATED;
284  vertex2AggId(i, 0) = bestAggId;
285  procWinner(i, 0) = myRank;
286 
287  Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
288  connectWeight(i) = bestConnect - penaltyConnectWeight;
289  tmpNumAggregated++;
290  }
291  }, numAggregated); //parallel_reduce
292  execution_space().fence();
293  Kokkos::parallel_for("Aggregation Phase 2b: updating agg penalties",
294  Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
295  KOKKOS_LAMBDA (const LO agg)
296  {
297  aggPenalties(agg) += aggPenaltyUpdates(agg);
298  aggPenaltyUpdates(agg) = 0;
299  });
300  numNonAggregatedNodes -= numAggregated;
301  }
302  } // loop over k
303  } // BuildAggregatesDeterministic
304 } // end namespace
305 
306 #endif // HAVE_MUELU_KOKKOS_REFACTOR
307 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
LocalOrdinal LO
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
TransListIter iter