Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_GaussSeidel.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
11 #define IFPACK2_GAUSS_SEIDEL_HPP
12 
13 namespace Ifpack2 {
14 namespace Details {
15 template <typename Scalar, typename LO, typename GO, typename NT>
16 struct GaussSeidel {
17  using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
18  using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
19  using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
20  using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
21  using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
22  using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
23  using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
24  using mem_space_t = typename local_matrix_device_type::memory_space;
25  using rowmap_t = typename local_matrix_device_type::row_map_type::HostMirror;
26  using entries_t = typename local_matrix_device_type::index_type::HostMirror;
27  using values_t = typename local_matrix_device_type::values_type::HostMirror;
28  using const_rowmap_t = typename rowmap_t::const_type;
29  using const_entries_t = typename entries_t::const_type;
30  using const_values_t = typename values_t::const_type;
31  using Offset = typename rowmap_t::non_const_value_type;
32  using IST = typename crs_matrix_type::impl_scalar_type;
33  using KAT = Kokkos::ArithTraits<IST>;
34  // Type of view representing inverse diagonal blocks, and its HostMirror.
35  using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
36  using InverseBlocksHost = typename InverseBlocks::HostMirror;
37 
38  typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
39  typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
40  typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
41 
42  // Setup for CrsMatrix
43  GaussSeidel(Teuchos::RCP<const crs_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
44  A = A_;
45  numRows = A_->getLocalNumRows();
46  haveRowMatrix = false;
47  inverseDiagVec = inverseDiagVec_;
48  applyRows = applyRows_;
49  blockSize = 1;
50  omega = omega_;
51  }
52 
53  GaussSeidel(Teuchos::RCP<const row_matrix_type> A_, Teuchos::RCP<vector_type>& inverseDiagVec_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
54  A = A_;
55  numRows = A_->getLocalNumRows();
56  haveRowMatrix = true;
57  inverseDiagVec = inverseDiagVec_;
58  applyRows = applyRows_;
59  blockSize = 1;
60  omega = omega_;
61  // Here, need to make a deep CRS copy to avoid slow access via getLocalRowCopy
62  rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing("Arowmap"), numRows + 1);
63  rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing("Aentries"), A_->getLocalNumEntries());
64  rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing("Avalues"), A_->getLocalNumEntries());
65  size_t maxDegree = A_->getLocalMaxNumRowEntries();
66  nonconst_values_host_view_type rowValues("rowValues", maxDegree);
67  nonconst_local_inds_host_view_type rowEntries("rowEntries", maxDegree);
68  size_t accum = 0;
69  for (LO i = 0; i <= numRows; i++) {
70  rowMatrixRowmap(i) = accum;
71  if (i == numRows)
72  break;
73  size_t degree;
74  A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
75  accum += degree;
76  size_t rowBegin = rowMatrixRowmap(i);
77  for (size_t j = 0; j < degree; j++) {
78  rowMatrixEntries(rowBegin + j) = rowEntries[j];
79  rowMatrixValues(rowBegin + j) = rowValues[j];
80  }
81  }
82  }
83 
84  GaussSeidel(Teuchos::RCP<const bcrs_matrix_type> A_, const InverseBlocks& inverseBlockDiag_, Teuchos::ArrayRCP<LO>& applyRows_, Scalar omega_) {
85  A = A_;
86  numRows = A_->getLocalNumRows();
87  haveRowMatrix = false;
88  // note: next 2 lines are no-ops if inverseBlockDiag_ is already host-accessible
89  inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
90  Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
91  applyRows = applyRows_;
92  omega = omega_;
93  blockSize = A_->getBlockSize();
94  }
95 
96  template <bool useApplyRows, bool multipleRHS, bool omegaNotOne>
97  void applyImpl(const const_values_t& Avalues, const const_rowmap_t& Arowmap, const const_entries_t& Aentries, multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
98  // note: direction is either Forward or Backward (Symmetric is handled in apply())
99  LO numApplyRows = useApplyRows ? (LO)applyRows.size() : numRows;
100  // note: inverseDiagMV always has only one column
101  auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
102  bool forward = direction == Tpetra::Forward;
103  if (multipleRHS) {
104  LO numVecs = x.getNumVectors();
105  Teuchos::Array<IST> accum(numVecs);
106  auto xlcl = x.get2dViewNonConst();
107  auto blcl = b.get2dView();
108  for (LO i = 0; i < numApplyRows; i++) {
109  LO row;
110  if (forward)
111  row = useApplyRows ? applyRows[i] : i;
112  else
113  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
114  for (LO k = 0; k < numVecs; k++) {
115  accum[k] = KAT::zero();
116  }
117  Offset rowBegin = Arowmap(row);
118  Offset rowEnd = Arowmap(row + 1);
119  for (Offset j = rowBegin; j < rowEnd; j++) {
120  LO col = Aentries(j);
121  IST val = Avalues(j);
122  for (LO k = 0; k < numVecs; k++) {
123  accum[k] += val * IST(xlcl[k][col]);
124  }
125  }
126  // Update x
127  IST dinv = inverseDiag(row);
128  for (LO k = 0; k < numVecs; k++) {
129  if (omegaNotOne)
130  xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
131  else
132  xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
133  }
134  }
135  } else {
136  auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
137  auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
138  auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
139  for (LO i = 0; i < numApplyRows; i++) {
140  LO row;
141  if (forward)
142  row = useApplyRows ? applyRows[i] : i;
143  else
144  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
145  IST accum = KAT::zero();
146  Offset rowBegin = Arowmap(row);
147  Offset rowEnd = Arowmap(row + 1);
148  for (Offset j = rowBegin; j < rowEnd; j++) {
149  accum += Avalues(j) * xlcl(Aentries(j));
150  }
151  // Update x
152  IST dinv = dlcl(row);
153  if (omegaNotOne)
154  xlcl(row) += omega * dinv * (blcl(row) - accum);
155  else
156  xlcl(row) += dinv * (blcl(row) - accum);
157  }
158  }
159  }
160 
161  void applyBlock(block_multivector_type& x, const block_multivector_type& b, Tpetra::ESweepDirection direction) {
162  if (direction == Tpetra::Symmetric) {
163  applyBlock(x, b, Tpetra::Forward);
164  applyBlock(x, b, Tpetra::Backward);
165  return;
166  }
167  auto Abcrs = Teuchos::rcp_dynamic_cast<const bcrs_matrix_type>(A);
168  if (Abcrs.is_null())
169  throw std::runtime_error("Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
170  auto Avalues = Abcrs->getValuesHost();
171  auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
172  auto Arowmap = AlclGraph.row_map;
173  auto Aentries = AlclGraph.entries;
174  // Number of scalars in Avalues per block entry.
175  Offset bs2 = blockSize * blockSize;
176  LO numVecs = x.getNumVectors();
177  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
178  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel Accumulator"), blockSize, numVecs);
179  Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
180  Kokkos::ViewAllocateWithoutInitializing("BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
181  bool useApplyRows = !applyRows.is_null();
182  bool forward = direction == Tpetra::Forward;
183  LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
184  for (LO i = 0; i < numApplyRows; i++) {
185  LO row;
186  if (forward)
187  row = useApplyRows ? applyRows[i] : i;
188  else
189  row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
190  for (LO v = 0; v < numVecs; v++) {
191  auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
192  for (LO k = 0; k < blockSize; k++) {
193  accum(k, v) = KAT::zero();
194  }
195  }
196  Offset rowBegin = Arowmap(row);
197  Offset rowEnd = Arowmap(row + 1);
198  for (Offset j = rowBegin; j < rowEnd; j++) {
199  LO col = Aentries(j);
200  const IST* blk = &Avalues(j * bs2);
201  for (LO v = 0; v < numVecs; v++) {
202  auto xCol = x.getLocalBlockHost(col, v, Tpetra::Access::ReadOnly);
203  for (LO br = 0; br < blockSize; br++) {
204  for (LO bc = 0; bc < blockSize; bc++) {
205  IST Aval = blk[br * blockSize + bc];
206  accum(br, v) += Aval * xCol(bc);
207  }
208  }
209  }
210  }
211  // Update x: term is omega * Aii^-1 * accum, where omega is scalar, Aii^-1 is bs*bs, and accum is bs*nv
212  auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
213  Kokkos::deep_copy(dinv_accum, KAT::zero());
214  for (LO v = 0; v < numVecs; v++) {
215  auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
216  for (LO br = 0; br < blockSize; br++) {
217  accum(br, v) = bRow(br) - accum(br, v);
218  }
219  }
220  for (LO v = 0; v < numVecs; v++) {
221  for (LO br = 0; br < blockSize; br++) {
222  for (LO bc = 0; bc < blockSize; bc++) {
223  dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
224  }
225  }
226  }
227  // Update x
228  for (LO v = 0; v < numVecs; v++) {
229  auto xRow = x.getLocalBlockHost(row, v, Tpetra::Access::ReadWrite);
230  for (LO k = 0; k < blockSize; k++) {
231  xRow(k) += omega * dinv_accum(k, v);
232  }
233  }
234  }
235  }
236 
237  // Version of apply for CrsMatrix/RowMatrix (for BlockCrsMatrix, call applyBlock)
238  void apply(multivector_type& x, const multivector_type& b, Tpetra::ESweepDirection direction) {
239  if (direction == Tpetra::Symmetric) {
240  apply(x, b, Tpetra::Forward);
241  apply(x, b, Tpetra::Backward);
242  return;
243  }
244  const_values_t Avalues;
245  const_rowmap_t Arowmap;
246  const_entries_t Aentries;
247  if (haveRowMatrix) {
248  Avalues = rowMatrixValues;
249  Arowmap = rowMatrixRowmap;
250  Aentries = rowMatrixEntries;
251  } else {
252  auto Acrs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
253  if (Acrs.is_null())
254  throw std::runtime_error("Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
255  auto Alcl = Acrs->getLocalMatrixHost();
256  Avalues = Alcl.values;
257  Arowmap = Alcl.graph.row_map;
258  Aentries = Alcl.graph.entries;
259  }
260  if (applyRows.is_null()) {
261  if (x.getNumVectors() > 1)
262  this->template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
263  else {
264  // Optimize for the all-rows, single vector, omega = 1 case
265  if (omega == KAT::one())
266  this->template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
267  else
268  this->template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
269  }
270  } else {
271  this->template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
272  }
273  }
274 
276  // For efficiency, if input is a RowMatrix, keep a persistent copy of the CRS formatted local matrix.
277  bool haveRowMatrix;
278  values_t rowMatrixValues;
279  rowmap_t rowMatrixRowmap;
280  entries_t rowMatrixEntries;
281  LO numRows;
282  IST omega;
283  // If set up with BlockCrsMatrix, the block size. Otherwise 1.
284  LO blockSize;
285  // If using a non-block matrix, the inverse diagonal.
286  Teuchos::RCP<vector_type> inverseDiagVec;
287  // If using a block matrix, the inverses of all diagonal blocks.
288  InverseBlocksHost inverseBlockDiag;
289  // If null, apply over all rows in natural order. Otherwise, apply for each row listed in order.
290  Teuchos::ArrayRCP<LO> applyRows;
291 };
292 } // namespace Details
293 } // namespace Ifpack2
294 
295 #endif