46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
58 #include "MueLu_Aggregates_kokkos.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(
const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
75 if(params.get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
85 template <
class LO,
class GO,
class Node>
86 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(
const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes)
const {
92 using memory_space =
typename LWGraph_kokkos::memory_space;
93 using execution_space =
typename LWGraph_kokkos::execution_space;
95 const LO numRows = graph.GetNodeNumVertices();
96 const int myRank = graph.GetComm()->getRank();
98 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
99 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
100 auto colors = aggregates.GetGraphColors();
101 const LO numColors = aggregates.GetGraphNumColors();
102 const LO numLocalAggregates = aggregates.GetNumAggregates();
104 const LO defaultConnectWeight = 100;
105 const LO penaltyConnectWeight = 10;
121 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
122 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
123 for (
int iter = 0;
iter < maxIters; ++
iter) {
124 for(
LO color = 1; color <= numColors; ++color) {
129 LO numAggregated = 0;
132 KOKKOS_LAMBDA (
const LO i,
LO& tmpNumAggregated) {
133 if (aggStat(i) !=
READY || colors(i) != color)
136 auto neighOfINode = graph.getNeighborVertices(i);
137 for (
int j = 0; j < neighOfINode.length; j++) {
138 LO neigh = neighOfINode(j);
142 if (graph.isLocalNeighborVertex(neigh) &&
144 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
145 connectWeight(neigh));
148 int bestScore = -100000;
150 int bestConnect = -1;
152 for (
int j = 0; j < neighOfINode.length; j++) {
153 LO neigh = neighOfINode(j);
155 if (graph.isLocalNeighborVertex(neigh) &&
157 auto aggId = vertex2AggId(neigh, 0);
158 int score = aggWeight(aggId) - aggPenalties(aggId);
160 if (score > bestScore) {
163 bestConnect = connectWeight(neigh);
165 }
else if (aggId == bestAggId &&
166 connectWeight(neigh) > bestConnect) {
167 bestConnect = connectWeight(neigh);
171 if (bestScore >= 0) {
173 vertex2AggId(i, 0) = bestAggId;
174 procWinner(i, 0) = myRank;
176 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
177 connectWeight(i) = bestConnect - penaltyConnectWeight;
181 numNonAggregatedNodes -= numAggregated;
189 template <
class LO,
class GO,
class Node>
190 void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
191 BuildAggregatesDeterministic(
const ParameterList& params,
192 const LWGraph_kokkos& graph,
193 Aggregates_kokkos& aggregates,
195 LO& numNonAggregatedNodes)
const {
196 using memory_space =
typename LWGraph_kokkos::memory_space;
197 using execution_space =
typename LWGraph_kokkos::execution_space;
199 const LO numRows = graph.GetNodeNumVertices();
200 const int myRank = graph.GetComm()->getRank();
202 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
203 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
204 auto colors = aggregates.GetGraphColors();
205 const LO numColors = aggregates.GetGraphNumColors();
206 LO numLocalAggregates = aggregates.GetNumAggregates();
208 const int defaultConnectWeight = 100;
209 const int penaltyConnectWeight = 10;
225 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
226 if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
227 for (
int iter = 0;
iter < maxIters; ++
iter) {
228 for(
LO color = 1; color <= numColors; color++) {
233 LO numAggregated = 0;
236 KOKKOS_LAMBDA (
const LO i)
238 if (aggStat(i) !=
READY || colors(i) != color)
240 auto neighOfINode = graph.getNeighborVertices(i);
241 for (
int j = 0; j < neighOfINode.length; j++) {
242 LO neigh = neighOfINode(j);
245 if (graph.isLocalNeighborVertex(neigh) &&
247 Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
248 connectWeight(neigh));
251 execution_space().fence();
254 KOKKOS_LAMBDA (
const LO i,
LO& tmpNumAggregated)
256 if (aggStat(i) !=
READY || colors(i) != color)
258 int bestScore = -100000;
260 int bestConnect = -1;
262 auto neighOfINode = graph.getNeighborVertices(i);
263 for (
int j = 0; j < neighOfINode.length; j++) {
264 LO neigh = neighOfINode(j);
266 if (graph.isLocalNeighborVertex(neigh) &&
268 auto aggId = vertex2AggId(neigh, 0);
269 int score = aggWeight(aggId) - aggPenalties(aggId);
271 if (score > bestScore) {
274 bestConnect = connectWeight(neigh);
276 }
else if (aggId == bestAggId &&
277 connectWeight(neigh) > bestConnect) {
278 bestConnect = connectWeight(neigh);
282 if (bestScore >= 0) {
284 vertex2AggId(i, 0) = bestAggId;
285 procWinner(i, 0) = myRank;
287 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
288 connectWeight(i) = bestConnect - penaltyConnectWeight;
292 execution_space().fence();
295 KOKKOS_LAMBDA (
const LO agg)
297 aggPenalties(agg) += aggPenaltyUpdates(agg);
298 aggPenaltyUpdates(agg) = 0;
300 numNonAggregatedNodes -= numAggregated;
306 #endif // HAVE_MUELU_KOKKOS_REFACTOR
307 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)