Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_MultiVectorLocalGatherScatter.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DETAILS_MULTIVECTORLOCALGATHERSCATTER_HPP
11 #define IFPACK2_DETAILS_MULTIVECTORLOCALGATHERSCATTER_HPP
12 
16 
17 #include "Tpetra_MultiVector.hpp"
18 #include "Tpetra_Map.hpp"
19 
20 namespace Ifpack2 {
21 namespace Details {
22 
51 template <class MV_in, class MV_out>
53  public:
54  typedef typename MV_in::scalar_type InScalar;
55  typedef typename MV_out::scalar_type OutScalar;
56  typedef typename MV_in::local_ordinal_type LO;
57  typedef typename MV_in::global_ordinal_type GO;
58  typedef typename MV_in::node_type NO;
59 
60  /**************/
61  /* MV <==> MV */
62  /**************/
63  void
64  gather(MV_out& X_out,
65  const MV_in& X_in,
66  const Teuchos::ArrayView<const LO> perm) const {
67  using Teuchos::ArrayRCP;
68  const size_t numRows = X_out.getLocalLength();
69  const size_t numVecs = X_in.getNumVectors();
70  Kokkos::fence();
71  for (size_t j = 0; j < numVecs; ++j) {
72  ArrayRCP<const InScalar> X_in_j = X_in.getData(j);
73  ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
74  for (size_t i = 0; i < numRows; ++i) {
75  const size_t i_perm = perm[i];
76  X_out_j[i] = X_in_j[i_perm];
77  }
78  }
79  X_out.modify_host();
80  }
81 
82  // Gather blocks (contiguous groups of blockSize rows)
83  // X_out and X_in are point indexed, but perm uses block indices.
84  // So X_out.getLocalLength() / blockSize gives the number of blocks.
85  void
86  gatherBlock(
87  MV_out& X_out,
88  const MV_in& X_in,
90  LO blockSize) const {
91  using Teuchos::ArrayRCP;
92  const size_t numBlocks = X_out.getLocalLength() / blockSize;
93  const size_t numVecs = X_in.getNumVectors();
94  Kokkos::fence();
95  for (size_t j = 0; j < numVecs; ++j) {
96  ArrayRCP<const InScalar> X_in_j = X_in.getData(j);
97  ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
98  for (size_t i = 0; i < numBlocks; ++i) {
99  const size_t i_perm = perm[i];
100  for (LO k = 0; k < blockSize; k++) {
101  X_out_j[i * blockSize + k] = X_in_j[i_perm * blockSize + k];
102  }
103  }
104  }
105  X_out.modify_host();
106  }
107 
108  void
109  scatter(MV_in& X_in,
110  const MV_out& X_out,
111  const Teuchos::ArrayView<const LO> perm) const {
112  using Teuchos::ArrayRCP;
113  const size_t numRows = X_out.getLocalLength();
114  const size_t numVecs = X_in.getNumVectors();
115  Kokkos::fence();
116  for (size_t j = 0; j < numVecs; ++j) {
117  ArrayRCP<InScalar> X_in_j = X_in.getDataNonConst(j);
118  ArrayRCP<const OutScalar> X_out_j = X_out.getData(j);
119  for (size_t i = 0; i < numRows; ++i) {
120  const size_t i_perm = perm[i];
121  X_in_j[i_perm] = X_out_j[i];
122  }
123  }
124  X_out.modify_host();
125  }
126 
127  void
128  scatterBlock(
129  MV_in& X_in,
130  const MV_out& X_out,
131  const Teuchos::ArrayView<const LO> perm,
132  LO blockSize) const {
133  using Teuchos::ArrayRCP;
134  const size_t numBlocks = X_out.getLocalLength() / blockSize;
135  const size_t numVecs = X_in.getNumVectors();
136  Kokkos::fence();
137  for (size_t j = 0; j < numVecs; ++j) {
138  ArrayRCP<const InScalar> X_in_j = X_in.getData(j);
139  ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
140  for (size_t i = 0; i < numBlocks; ++i) {
141  const size_t i_perm = perm[i];
142  for (LO k = 0; k < blockSize; k++) {
143  X_in_j[i_perm * blockSize + k] = X_out_j[i * blockSize + k];
144  }
145  }
146  }
147  X_out.modify_host();
148  }
149 
150  /******************/
151  /* View <==> View */
152  /******************/
153  template <typename InView, typename OutView>
154  void gatherViewToView(OutView X_out,
155  const InView X_in,
156  const Teuchos::ArrayView<const LO> perm) const {
157  // note: j is col, i is row
158  Kokkos::fence(); // demonstrated via unit test failure
159  for (size_t j = 0; j < X_out.extent(1); ++j) {
160  for (size_t i = 0; i < X_out.extent(0); ++i) {
161  const LO i_perm = perm[i];
162  X_out(i, j) = X_in(i_perm, j);
163  }
164  }
165  }
166 
167  template <typename InView, typename OutView>
168  void scatterViewToView(InView X_in,
169  const OutView X_out,
170  const Teuchos::ArrayView<const LO> perm) const {
171  Kokkos::fence();
172  for (size_t j = 0; j < X_out.extent(1); ++j) {
173  for (size_t i = 0; i < X_out.extent(0); ++i) {
174  const LO i_perm = perm[i];
175  X_in(i_perm, j) = X_out(i, j);
176  }
177  }
178  }
179 
180  template <typename InView, typename OutView>
181  void gatherViewToViewBlock(OutView X_out,
182  const InView X_in,
183  const Teuchos::ArrayView<const LO> perm,
184  LO blockSize) const {
185  // note: j is col, i is row
186  Kokkos::fence();
187  size_t numBlocks = X_out.extent(0) / blockSize;
188  for (size_t j = 0; j < X_out.extent(1); ++j) {
189  for (size_t i = 0; i < numBlocks; ++i) {
190  const LO i_perm = perm[i];
191  for (LO k = 0; k < blockSize; k++) {
192  X_out(i * blockSize + k, j) = X_in(i_perm * blockSize + k, j);
193  }
194  }
195  }
196  }
197 
198  template <typename InView, typename OutView>
199  void scatterViewToViewBlock(InView X_in,
200  const OutView X_out,
201  const Teuchos::ArrayView<const LO> perm,
202  LO blockSize) const {
203  // note: j is col, i is row
204  Kokkos::fence();
205  size_t numBlocks = X_out.extent(0) / blockSize;
206  for (size_t j = 0; j < X_out.extent(1); ++j) {
207  for (size_t i = 0; i < numBlocks; ++i) {
208  const LO i_perm = perm[i];
209  for (LO k = 0; k < blockSize; k++) {
210  X_in(i_perm * blockSize + k, j) = X_out(i * blockSize + k, j);
211  }
212  }
213  }
214  }
215 
216  /*******************************/
217  /* MV <==> View specialization */
218  /*******************************/
219  template <typename InView>
220  void gatherMVtoView(MV_out X_out,
221  InView X_in,
222  const Teuchos::ArrayView<const LO> perm) const {
223  // note: j is col, i is row
224  Kokkos::fence();
225  size_t numRows = X_out.getLocalLength();
226  for (size_t j = 0; j < X_out.getNumVectors(); ++j) {
227  Teuchos::ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
228  for (size_t i = 0; i < numRows; ++i) {
229  const LO i_perm = perm[i];
230  X_out_j[i] = X_in(i_perm, j);
231  }
232  }
233  }
234 
235  template <typename InView>
236  void scatterMVtoView(InView X_in,
237  MV_out X_out,
238  const Teuchos::ArrayView<const LO> perm) const {
239  size_t numRows = X_out.getLocalLength();
240  Kokkos::fence();
241  for (size_t j = 0; j < X_in.extent(1); ++j) {
242  Teuchos::ArrayRCP<const OutScalar> X_out_j = X_out.getData(j);
243  for (size_t i = 0; i < numRows; ++i) {
244  const LO i_perm = perm[i];
245  X_in(i_perm, j) = X_out_j[i];
246  }
247  }
248  }
249 
250  template <typename InView>
251  void gatherMVtoViewBlock(MV_out X_out,
252  InView X_in,
253  const Teuchos::ArrayView<const LO> perm,
254  LO blockSize) const {
255  // note: j is col, i is row
256  size_t numBlocks = X_out.getLocalLength() / blockSize;
257  Kokkos::fence();
258  for (size_t j = 0; j < X_out.getNumVectors(); ++j) {
259  Teuchos::ArrayRCP<OutScalar> X_out_j = X_out.getDataNonConst(j);
260  for (size_t i = 0; i < numBlocks; ++i) {
261  const LO i_perm = perm[i];
262  for (LO k = 0; k < blockSize; k++) {
263  X_out_j[i * blockSize + k] = X_in(i_perm * blockSize + k, j);
264  }
265  }
266  }
267  }
268 
269  template <typename InView>
270  void scatterMVtoViewBlock(InView X_in,
271  MV_out X_out,
272  const Teuchos::ArrayView<const LO> perm,
273  LO blockSize) const {
274  size_t numBlocks = X_out.getLocalLength() / blockSize;
275  Kokkos::fence();
276  for (size_t j = 0; j < X_in.extent(1); ++j) {
277  Teuchos::ArrayRCP<const OutScalar> X_out_j = X_out.getData(j);
278  for (size_t i = 0; i < numBlocks; ++i) {
279  const LO i_perm = perm[i];
280  for (LO k = 0; k < blockSize; k++) {
281  X_in(i_perm * blockSize + k, j) = X_out_j[i * blockSize + k];
282  }
283  }
284  }
285  }
286 };
287 
288 } // namespace Details
289 } // namespace Ifpack2
290 
291 #endif // IFPACK2_DETAILS_MULTIVECTORLOCALGATHERSCATTER_HPP
Implementation detail of Ifpack2::Container subclasses.
Definition: Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:52