Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_DenseContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_DENSECONTAINER_DEF_HPP
11 #define IFPACK2_DENSECONTAINER_DEF_HPP
12 
13 #include "Tpetra_CrsMatrix.hpp"
14 #include "Teuchos_LAPACK.hpp"
15 #include "Tpetra_BlockMultiVector.hpp"
16 
17 #ifdef HAVE_MPI
18 #include <mpi.h>
20 #else
21 #include "Teuchos_DefaultSerialComm.hpp"
22 #endif // HAVE_MPI
23 
24 namespace Ifpack2 {
25 
26 template <class MatrixType, class LocalScalarType>
29  const Teuchos::Array<Teuchos::Array<LO> >& partitions,
31  bool pointIndexed)
32  : ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
33  , scalarOffsets_(this->numBlocks_) {
35  !matrix->hasColMap(), std::invalid_argument,
36  "Ifpack2::DenseContainer: "
37  "The constructor's input matrix must have a column Map.");
38 
39  // compute scalarOffsets_
40  GO totalScalars = 0;
41  for (LO i = 0; i < this->numBlocks_; i++) {
42  scalarOffsets_[i] = totalScalars;
43  totalScalars += this->blockSizes_[i] * this->scalarsPerRow_ *
44  this->blockSizes_[i] * this->scalarsPerRow_;
45  }
46  scalars_.resize(totalScalars);
47  for (int i = 0; i < this->numBlocks_; i++) {
48  LO denseRows = this->blockSizes_[i] * this->scalarsPerRow_;
49  // create square dense matrix (stride is same as rows and cols)
50  diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], denseRows, denseRows, denseRows);
51  }
52 
53  ipiv_.resize(this->blockRows_.size() * this->scalarsPerRow_);
54 }
55 
56 template <class MatrixType, class LocalScalarType>
59 
60 template <class MatrixType, class LocalScalarType>
63  // Fill the diagonal block and LU permutation array with zeros.
64  for (int i = 0; i < this->numBlocks_; i++)
65  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
66  std::fill(ipiv_.begin(), ipiv_.end(), 0);
67 
68  this->IsInitialized_ = true;
69  // We assume that if you called this method, you intend to recompute
70  // everything.
71  this->IsComputed_ = false;
72 }
73 
74 template <class MatrixType, class LocalScalarType>
77  // FIXME: I am commenting this out because it breaks block CRS support
78  // TEUCHOS_TEST_FOR_EXCEPTION(
79  // static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
80  // "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
81  // "Please report this bug to the Ifpack2 developers.");
82 
83  this->IsComputed_ = false;
84  if (!this->isInitialized()) {
85  this->initialize();
86  }
87 
88  extract(); // Extract the submatrices
89  factor(); // Factor them
90  this->IsComputed_ = true;
91 }
92 
93 template <class MatrixType, class LocalScalarType>
95  using Teuchos::Array;
96  using Teuchos::ArrayView;
97  using STS = Teuchos::ScalarTraits<SC>;
98  SC zero = STS::zero();
99  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
100  // To extract diagonal blocks, need to translate local rows to local columns.
101  // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
102  // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
103  // offset - blockOffsets_[b]: gives the column within block b.
104  //
105  // This provides the block and col within a block in O(1).
106  if (this->scalarsPerRow_ > 1) {
107  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
108  for (int i = 0; i < this->numBlocks_; i++) {
109  // Get the interval where block i is defined in blockRows_
110  LO blockStart = this->blockOffsets_[i];
111  LO blockEnd = blockStart + this->blockSizes_[i];
112  ArrayView<const LO> blockRows = this->getBlockRows(i);
113  // Set the lookup table entries for the columns appearing in block i.
114  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
115  // this is OK. The values updated here are only needed to process block i's entries.
116  for (size_t j = 0; j < size_t(blockRows.size()); j++) {
117  LO localCol = this->translateRowToCol(blockRows[j]);
118  colToBlockOffset[localCol] = blockStart + j;
119  }
120  using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
121  using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
122  for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
123  // get a raw view of the whole block row
124  h_inds_type indices;
125  h_vals_type values;
126  LO inputRow = this->blockRows_[blockStart + blockRow];
127  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
128  LO numEntries = (LO)indices.size();
129  for (LO k = 0; k < numEntries; k++) {
130  LO colOffset = colToBlockOffset[indices[k]];
131  if (blockStart <= colOffset && colOffset < blockEnd) {
132  // This entry does appear in the diagonal block.
133  //(br, bc) identifies the scalar's position in the BlockCrs block.
134  // Convert this to (r, c) which is its position in the container block.
135  LO blockCol = colOffset - blockStart;
136  for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
137  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
138  LO r = this->bcrsBlockSize_ * blockRow + br;
139  LO c = this->bcrsBlockSize_ * blockCol + bc;
140  auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
141  if (val != zero)
142  diagBlocks_[i](r, c) = val;
143  }
144  }
145  }
146  }
147  }
148  }
149  } else {
150  // get the mapping from point-indexed matrix columns to offsets in blockRows_
151  //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
152  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
153  for (int i = 0; i < this->numBlocks_; i++) {
154  // Get the interval where block i is defined in blockRows_
155  LO blockStart = this->blockOffsets_[i];
156  LO blockEnd = blockStart + this->blockSizes_[i];
157  ArrayView<const LO> blockRows = this->getBlockRows(i);
158  // Set the lookup table entries for the columns appearing in block i.
159  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
160  // this is OK. The values updated here are only needed to process block i's entries.
161  for (size_t j = 0; j < size_t(blockRows.size()); j++) {
162  // translateRowToCol will return the corresponding split column
163  LO localCol = this->translateRowToCol(blockRows[j]);
164  colToBlockOffset[localCol] = blockStart + j;
165  }
166  for (size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
167  // get a view of the split row
168  LO inputPointRow = this->blockRows_[blockStart + blockRow];
169  auto rowView = this->getInputRowView(inputPointRow);
170  for (size_t k = 0; k < rowView.size(); k++) {
171  LO colOffset = colToBlockOffset[rowView.ind(k)];
172  if (blockStart <= colOffset && colOffset < blockEnd) {
173  LO blockCol = colOffset - blockStart;
174  auto val = rowView.val(k);
175  if (val != zero)
176  diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
177  }
178  }
179  }
180  }
181  }
182 }
183 
184 template <class MatrixType, class LocalScalarType>
185 void DenseContainer<MatrixType, LocalScalarType>::
186  factor() {
188  for (int i = 0; i < this->numBlocks_; i++) {
189  int INFO = 0;
190  int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
191  lapack.GETRF(diagBlocks_[i].numRows(),
192  diagBlocks_[i].numCols(),
193  diagBlocks_[i].values(),
194  diagBlocks_[i].stride(),
195  blockIpiv, &INFO);
196  // INFO < 0 is a bug.
198  INFO < 0, std::logic_error,
199  "Ifpack2::DenseContainer::factor: "
200  "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
201  "incorrectly. INFO = "
202  << INFO << " < 0. "
203  "Please report this bug to the Ifpack2 developers.");
204  // INFO > 0 means the matrix is singular. This is probably an issue
205  // either with the choice of rows the rows we extracted, or with the
206  // input matrix itself.
208  INFO > 0, std::runtime_error,
209  "Ifpack2::DenseContainer::factor: "
210  "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
211  "computed U factor is exactly singular. U("
212  << INFO << "," << INFO << ") "
213  "(one-based index i) is exactly zero. This probably means that the input "
214  "matrix has a singular diagonal block.");
215  }
216 }
217 
218 template <class MatrixType, class LocalScalarType>
219 void DenseContainer<MatrixType, LocalScalarType>::
220  solveBlock(ConstHostSubviewLocal X,
221  HostSubviewLocal Y,
222  int blockIndex,
223  Teuchos::ETransp mode,
224  LSC alpha,
225  LSC beta) const {
226 #ifdef HAVE_IFPACK2_DEBUG
228  X.extent(0) != Y.extent(0),
229  std::logic_error,
230  "Ifpack2::DenseContainer::solveBlock: X and Y have "
231  "incompatible dimensions ("
232  << X.extent(0) << " resp. "
233  << Y.extent(0) << "). Please report this bug to "
234  "the Ifpack2 developers.");
235 
237  X.extent(1) != Y.extent(1),
238  std::logic_error,
239  "Ifpack2::DenseContainer::solveBlock: X and Y have "
240  "incompatible numbers of vectors ("
241  << X.extent(1) << " resp. "
242  << Y.extent(1) << "). Please report this bug to "
243  "the Ifpack2 developers.");
244 #endif
245 
246  typedef Teuchos::ScalarTraits<LSC> STS;
247  size_t numRows = X.extent(0);
248  size_t numVecs = X.extent(1);
249  if (alpha == STS::zero()) { // don't need to solve the linear system
250  if (beta == STS::zero()) {
251  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
252  // any Inf or NaN values in Y (rather than multiplying them by
253  // zero, resulting in NaN values).
254  for (size_t j = 0; j < numVecs; j++) {
255  for (size_t i = 0; i < numRows; i++)
256  Y(i, j) = STS::zero();
257  }
258  } else // beta != 0
259  for (size_t j = 0; j < numVecs; j++) {
260  for (size_t i = 0; i < numRows; i++)
261  Y(i, j) = beta * Y(i, j);
262  }
263  } else { // alpha != 0; must solve the linear system
265  // If beta is nonzero or Y is not constant stride, we have to use
266  // a temporary output multivector. It gets a (deep) copy of X,
267  // since GETRS overwrites its (multi)vector input with its output.
268  std::vector<LSC> yTemp(numVecs * numRows);
269  for (size_t j = 0; j < numVecs; j++) {
270  for (size_t i = 0; i < numRows; i++)
271  yTemp[j * numRows + i] = X(i, j);
272  }
273 
274  int INFO = 0;
275  int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
276  const char trans =
277  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
278  lapack.GETRS(trans,
279  diagBlocks_[blockIndex].numRows(),
280  numVecs,
281  diagBlocks_[blockIndex].values(),
282  diagBlocks_[blockIndex].stride(),
283  blockIpiv,
284  yTemp.data(),
285  numRows,
286  &INFO);
287 
288  if (beta != STS::zero()) {
289  for (size_t j = 0; j < numVecs; j++) {
290  for (size_t i = 0; i < numRows; i++) {
291  Y(i, j) *= ISC(beta);
292  Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
293  }
294  }
295  } else {
296  for (size_t j = 0; j < numVecs; j++) {
297  for (size_t i = 0; i < numRows; i++)
298  Y(i, j) = ISC(yTemp[j * numRows + i]);
299  }
300  }
301 
303  INFO != 0, std::runtime_error,
304  "Ifpack2::DenseContainer::solveBlock: "
305  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
306  "failed with INFO = "
307  << INFO << " != 0.");
308  }
309 }
310 
311 template <class MatrixType, class LocalScalarType>
312 std::ostream&
314  print(std::ostream& os) const {
315  Teuchos::FancyOStream fos(Teuchos::rcpFromRef(os));
316  fos.setOutputToRootOnly(0);
317  this->describe(fos);
318  return os;
319 }
320 
321 template <class MatrixType, class LocalScalarType>
322 std::string
324  description() const {
325  std::ostringstream oss;
326  oss << "Ifpack::DenseContainer: ";
327  if (this->isInitialized()) {
328  if (this->isComputed()) {
329  oss << "{status = initialized, computed";
330  } else {
331  oss << "{status = initialized, not computed";
332  }
333  } else {
334  oss << "{status = not initialized, not computed";
335  }
336 
337  oss << "}";
338  return oss.str();
339 }
340 
341 template <class MatrixType, class LocalScalarType>
344  const Teuchos::EVerbosityLevel verbLevel) const {
345  using std::endl;
346  if (verbLevel == Teuchos::VERB_NONE) return;
347  os << "================================================================================" << endl;
348  os << "Ifpack2::DenseContainer" << endl;
349  for (int i = 0; i < this->numBlocks_; i++) {
350  os << "Block " << i << " number of rows = " << this->blockSizes_[i] << endl;
351  }
352  os << "isInitialized() = " << this->IsInitialized_ << endl;
353  os << "isComputed() = " << this->IsComputed_ << endl;
354  os << "================================================================================" << endl;
355  os << endl;
356 }
357 
358 template <class MatrixType, class LocalScalarType>
360  diagBlocks_.clear();
361  scalars_.clear();
363 }
364 
365 template <class MatrixType, class LocalScalarType>
367  return "Dense";
368 }
369 
370 } // namespace Ifpack2
371 
372 // There's no need to instantiate for CrsMatrix too. All Ifpack2
373 // preconditioners can and should do dynamic casts if they need a type
374 // more specific than RowMatrix.
375 
376 #define IFPACK2_DENSECONTAINER_INSTANT(S, LO, GO, N) \
377  template class Ifpack2::DenseContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
378 
379 #endif // IFPACK2_DENSECONTAINER_DEF_HPP
Teuchos::Array< LO > blockRows_
Local indices of the rows of the input matrix that belong to this block.
Definition: Ifpack2_Container_decl.hpp:259
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:261
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:257
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:307
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_DenseContainer_def.hpp:76
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:282
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_DenseContainer_def.hpp:366
void GETRF(const OrdinalType &m, const OrdinalType &n, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
Store and solve a local dense linear problem.
Definition: Ifpack2_DenseContainer_decl.hpp:70
DenseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO >> &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_DenseContainer_def.hpp:314
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_DenseContainer_def.hpp:343
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
virtual ~DenseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_DenseContainer_def.hpp:58
void GETRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
size_type size() const
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_DenseContainer_def.hpp:324
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_DenseContainer_def.hpp:62