Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_TriDiContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
11 #define IFPACK2_TRIDICONTAINER_DEF_HPP
12 
13 #include "Teuchos_LAPACK.hpp"
14 
15 #ifdef HAVE_MPI
16 #include <mpi.h>
18 #else
19 #include "Teuchos_DefaultSerialComm.hpp"
20 #endif // HAVE_MPI
21 
22 namespace Ifpack2 {
23 
24 template <class MatrixType, class LocalScalarType>
27  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
29  bool pointIndexed)
30  : ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
31  , ipiv_(this->blockRows_.size() * this->scalarsPerRow_)
32  , scalarOffsets_(this->numBlocks_) {
34  !matrix->hasColMap(), std::invalid_argument,
35  "Ifpack2::TriDiContainer: "
36  "The constructor's input matrix must have a column Map.");
37  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
38  // different index base than zero?
39  // compute scalar array offsets (probably different from blockOffsets_)
40  LO scalarTotal = 0;
41  for (LO i = 0; i < this->numBlocks_; i++) {
42  scalarOffsets_[i] = scalarTotal;
43  // actualBlockRows is how many scalars it takes to store the diagonal for block i
44  LO actualBlockRows = this->scalarsPerRow_ * this->blockSizes_[i];
45  if (actualBlockRows == 1) {
46  scalarTotal += 1;
47  } else {
48  // this formula is exact for any block size of 1 or more
49  // includes 1 subdiagonal and 2 superdiagonals.
50  scalarTotal += 4 * (actualBlockRows - 1);
51  }
52  }
53  // Allocate scalar arrays
54  scalars_.resize(scalarTotal);
55  diagBlocks_.reserve(this->numBlocks_);
56  for (int i = 0; i < this->numBlocks_; i++) {
57  diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], this->blockSizes_[i] * this->scalarsPerRow_);
58  }
59 }
60 
61 template <class MatrixType, class LocalScalarType>
64 
65 template <class MatrixType, class LocalScalarType>
67  for (int i = 0; i < this->numBlocks_; i++)
68  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
69  this->IsInitialized_ = true;
70  // We assume that if you called this method, you intend to recompute
71  // everything.
72  this->IsComputed_ = false;
73 }
74 
75 template <class MatrixType, class LocalScalarType>
77 #ifdef HAVE_IFPACK2_DEBUG
79  ipiv_.size() != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
80  "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
81  "Please report this bug to the Ifpack2 developers.");
82 #endif
83 
84  this->IsComputed_ = false;
85  if (!this->isInitialized()) {
86  this->initialize();
87  }
88  extract(); // extract the submatrices
89  factor(); // factor them
90  this->IsComputed_ = true;
91 }
92 
93 template <class MatrixType, class LocalScalarType>
95  using Teuchos::Array;
96  using Teuchos::ArrayView;
98  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
99  // To extract diagonal blocks, need to translate local rows to local columns.
100  // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
101  // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
102  // offset - blockOffsets_[b]: gives the column within block b.
103  //
104  // This provides the block and col within a block in O(1).
105  if (this->scalarsPerRow_ > 1) {
106  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
107  for (int i = 0; i < this->numBlocks_; i++) {
108  // Get the interval where block i is defined in blockRows_
109  LO blockStart = this->blockOffsets_[i];
110  LO blockEnd = blockStart + this->blockSizes_[i];
111  ArrayView<const LO> blockRows = this->getBlockRows(i);
112  // Set the lookup table entries for the columns appearing in block i.
113  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
114  // this is OK. The values updated here are only needed to process block i's entries.
115  for (size_t j = 0; j < size_t(blockRows.size()); j++) {
116  LO localCol = this->translateRowToCol(blockRows[j]);
117  colToBlockOffset[localCol] = blockStart + j;
118  }
119  using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
120  using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
121  for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
122  // get a raw view of the whole block row
123  h_inds_type indices;
124  h_vals_type values;
125  LO inputRow = this->blockRows_[blockStart + blockRow];
126  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
127  LO numEntries = (LO)indices.size();
128  for (LO k = 0; k < numEntries; k++) {
129  LO colOffset = colToBlockOffset[indices[k]];
130  if (blockStart <= colOffset && colOffset < blockEnd) {
131  // This entry does appear in the diagonal block.
132  //(br, bc) identifies the scalar's position in the BlockCrs block.
133  // Convert this to (r, c) which is its position in the container block.
134  LO blockCol = colOffset - blockStart;
135  for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
136  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
137  LO r = this->bcrsBlockSize_ * blockRow + br;
138  LO c = this->bcrsBlockSize_ * blockCol + bc;
139  auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
140  if (val != zero)
141  diagBlocks_[i](r, c) = val;
142  }
143  }
144  }
145  }
146  }
147  }
148  } else {
149  // get the mapping from point-indexed matrix columns to offsets in blockRows_
150  //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
151  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
152  for (int i = 0; i < this->numBlocks_; i++) {
153  // Get the interval where block i is defined in blockRows_
154  LO blockStart = this->blockOffsets_[i];
155  LO blockEnd = blockStart + this->blockSizes_[i];
156  ArrayView<const LO> blockRows = this->getBlockRows(i);
157  // Set the lookup table entries for the columns appearing in block i.
158  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
159  // this is OK. The values updated here are only needed to process block i's entries.
160  for (size_t j = 0; j < size_t(blockRows.size()); j++) {
161  // translateRowToCol will return the corresponding split column
162  LO localCol = this->translateRowToCol(blockRows[j]);
163  colToBlockOffset[localCol] = blockStart + j;
164  }
165  for (size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
166  // get a view of the split row
167  LO inputPointRow = this->blockRows_[blockStart + blockRow];
168  auto rowView = this->getInputRowView(inputPointRow);
169  for (size_t k = 0; k < rowView.size(); k++) {
170  LO colOffset = colToBlockOffset[rowView.ind(k)];
171  if (blockStart <= colOffset && colOffset < blockEnd) {
172  LO blockCol = colOffset - blockStart;
173  auto val = rowView.val(k);
174  if (val != zero)
175  diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
176  }
177  }
178  }
179  }
180  }
181 }
182 
183 template <class MatrixType, class LocalScalarType>
184 void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks() {
185  diagBlocks_.clear();
186  scalars_.clear();
187  ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
188 }
189 
190 template <class MatrixType, class LocalScalarType>
191 void TriDiContainer<MatrixType, LocalScalarType>::factor() {
192  for (int i = 0; i < this->numBlocks_; i++) {
194  int INFO = 0;
195  int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
196  lapack.GTTRF(diagBlocks_[i].numRowsCols(),
197  diagBlocks_[i].DL(),
198  diagBlocks_[i].D(),
199  diagBlocks_[i].DU(),
200  diagBlocks_[i].DU2(),
201  blockIpiv, &INFO);
202  // INFO < 0 is a bug.
204  INFO < 0, std::logic_error,
205  "Ifpack2::TriDiContainer::factor: "
206  "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
207  "incorrectly. INFO = "
208  << INFO << " < 0. "
209  "Please report this bug to the Ifpack2 developers.");
210  // INFO > 0 means the matrix is singular. This is probably an issue
211  // either with the choice of rows the rows we extracted, or with the
212  // input matrix itself.
214  INFO > 0, std::runtime_error,
215  "Ifpack2::TriDiContainer::factor: "
216  "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
217  "computed U factor is exactly singular. U("
218  << INFO << "," << INFO << ") "
219  "(one-based index i) is exactly zero. This probably means that the input "
220  "matrix has a singular diagonal block.");
221  }
222 }
223 
224 template <class MatrixType, class LocalScalarType>
226  solveBlock(ConstHostSubviewLocal X,
227  HostSubviewLocal Y,
228  int blockIndex,
229  Teuchos::ETransp mode,
230  LSC alpha,
231  LSC beta) const {
233  size_t numVecs = X.extent(1);
234  size_t numRows = X.extent(0);
235 
236 #ifdef HAVE_IFPACK2_DEBUG
238  X.extent(0) != Y.extent(0),
239  std::logic_error,
240  "Ifpack2::TriDiContainer::solveBlock: X and Y have "
241  "incompatible dimensions ("
242  << X.extent(0) << " resp. "
243  << Y.extent(0) << "). Please report this bug to "
244  "the Ifpack2 developers.");
246  X.extent(0) != static_cast<size_t>(diagBlocks_[blockIndex].numRowsCols()),
247  std::logic_error,
248  "Ifpack2::TriDiContainer::solveBlock: The input "
249  "multivector X has incompatible dimensions from those of the "
250  "inverse operator ("
251  << X.extent(0) << " vs. "
252  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols())
253  << "). Please report this bug to the Ifpack2 developers.");
254  TEUCHOS_TEST_FOR_EXCEPTION(
255  Y.extent(0) != static_cast<size_t>(diagBlocks_[blockIndex].numRowsCols()),
256  std::logic_error,
257  "Ifpack2::TriDiContainer::solveBlock: The output "
258  "multivector Y has incompatible dimensions from those of the "
259  "inverse operator ("
260  << Y.extent(0) << " vs. "
261  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols())
262  << "). Please report this bug to the Ifpack2 developers.");
263 #endif
264 
265  if (alpha == zero) { // don't need to solve the linear system
266  if (beta == zero) {
267  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
268  // any Inf or NaN values in Y (rather than multiplying them by
269  // zero, resulting in NaN values).
270  for (size_t j = 0; j < numVecs; j++)
271  for (size_t i = 0; i < numRows; i++)
272  Y(i, j) = zero;
273  } else { // beta != 0
274  for (size_t j = 0; j < numVecs; j++)
275  for (size_t i = 0; i < numRows; i++)
276  Y(i, j) *= ISC(beta);
277  }
278  } else { // alpha != 0; must solve the linear system
280  // If beta is nonzero or Y is not constant stride, we have to use
281  // a temporary output multivector. It gets a copy of X, since
282  // GETRS overwrites its (multi)vector input with its output.
283 
284  std::vector<LSC> yTemp(numVecs * numRows);
285  for (size_t j = 0; j < numVecs; j++) {
286  for (size_t i = 0; i < numRows; i++)
287  yTemp[j * numRows + i] = X(i, j);
288  }
289 
290  int INFO = 0;
291  const char trans =
292  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
293  int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
294  lapack.GTTRS(trans,
295  diagBlocks_[blockIndex].numRowsCols(),
296  numVecs,
297  diagBlocks_[blockIndex].DL(),
298  diagBlocks_[blockIndex].D(),
299  diagBlocks_[blockIndex].DU(),
300  diagBlocks_[blockIndex].DU2(),
301  blockIpiv,
302  yTemp.data(),
303  numRows,
304  &INFO);
305 
306  if (beta != zero) {
307  for (size_t j = 0; j < numVecs; j++) {
308  for (size_t i = 0; i < numRows; i++) {
309  Y(i, j) *= ISC(beta);
310  Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
311  }
312  }
313  } else {
314  for (size_t j = 0; j < numVecs; j++) {
315  for (size_t i = 0; i < numRows; i++)
316  Y(i, j) = ISC(yTemp[j * numRows + i]);
317  }
318  }
319 
320  TEUCHOS_TEST_FOR_EXCEPTION(
321  INFO != 0, std::runtime_error,
322  "Ifpack2::TriDiContainer::solveBlock: "
323  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
324  "failed with INFO = "
325  << INFO << " != 0.");
326  }
327 }
328 
329 template <class MatrixType, class LocalScalarType>
330 std::ostream& TriDiContainer<MatrixType, LocalScalarType>::print(std::ostream& os) const {
331  Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
332  fos.setOutputToRootOnly(0);
333  describe(fos);
334  return (os);
335 }
336 
337 template <class MatrixType, class LocalScalarType>
339  std::ostringstream oss;
341  if (this->isInitialized()) {
342  if (this->isComputed()) {
343  oss << "{status = initialized, computed";
344  } else {
345  oss << "{status = initialized, not computed";
346  }
347  } else {
348  oss << "{status = not initialized, not computed";
349  }
350 
351  oss << "}";
352  return oss.str();
353 }
354 
355 template <class MatrixType, class LocalScalarType>
358  const Teuchos::EVerbosityLevel verbLevel) const {
359  using std::endl;
360  if (verbLevel == Teuchos::VERB_NONE) return;
361  os << "================================================================================" << endl;
362  os << "Ifpack2::TriDiContainer" << endl;
363  os << "Number of blocks = " << this->numBlocks_ << endl;
364  os << "isInitialized() = " << this->IsInitialized_ << endl;
365  os << "isComputed() = " << this->IsComputed_ << endl;
366  os << "================================================================================" << endl;
367  os << endl;
368 }
369 
370 template <class MatrixType, class LocalScalarType>
372  return "TriDi";
373 }
374 
375 #define IFPACK2_TRIDICONTAINER_INSTANT(S, LO, GO, N) \
376  template class Ifpack2::TriDiContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
377 
378 } // namespace Ifpack2
379 
380 #endif
Store and solve a local TriDi linear problem.
Definition: Ifpack2_TriDiContainer_decl.hpp:72
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_TriDiContainer_def.hpp:338
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_TriDiContainer_def.hpp:371
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:261
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_TriDiContainer_def.hpp:76
typename Kokkos::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container_decl.hpp:101
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:257
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:282
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_TriDiContainer_def.hpp:63
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_TriDiContainer_def.hpp:357
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_TriDiContainer_def.hpp:330
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO >> &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_TriDiContainer_def.hpp:66
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition: Ifpack2_TriDiContainer_def.hpp:226