10 #ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP
11 #define IFPACK2_BANDEDCONTAINER_DEF_HPP
14 #include "Tpetra_CrsMatrix.hpp"
22 #include "Teuchos_DefaultSerialComm.hpp"
27 template <
class MatrixType,
class LocalScalarType>
33 :
ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
34 , ipiv_(this->blockRows_.size() * this->scalarsPerRow_)
35 , kl_(this->numBlocks_, -1)
36 , ku_(this->numBlocks_, -1)
37 , scalarOffsets_(this->numBlocks_) {
39 !matrix->hasColMap(), std::invalid_argument,
40 "Ifpack2::BandedContainer: "
41 "The constructor's input matrix must have a column Map.");
44 template <
class MatrixType,
class LocalScalarType>
48 template <
class MatrixType,
class LocalScalarType>
51 if (List.
isParameter(
"relaxation: banded container superdiagonals")) {
52 int ku = List.
get<
int>(
"relaxation: banded container superdiagonals");
53 for (LO b = 0; b < this->numBlocks_; b++)
56 if (List.
isParameter(
"relaxation: banded container subdiagonals")) {
57 int kl = List.
get<
int>(
"relaxation: banded container subdiagonals");
58 for (LO b = 0; b < this->numBlocks_; b++)
63 template <
class MatrixType,
class LocalScalarType>
70 size_t colToOffsetSize = this->inputMatrix_->getLocalNumCols();
71 if (this->pointIndexed_)
72 colToOffsetSize *= this->bcrsBlockSize_;
73 Array<LO> colToBlockOffset(colToOffsetSize, INVALID);
76 for (
int i = 0; i < this->numBlocks_; i++) {
80 if (this->scalarsPerRow_ > 1) {
82 LO blockStart = this->blockOffsets_[i];
83 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
84 ArrayView<const LO> blockRows = this->getBlockRows(i);
88 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
89 LO localCol = this->translateRowToCol(blockRows[j]);
90 colToBlockOffset[localCol] = blockStart + j;
93 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
94 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
95 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
99 LO inputRow = this->blockRows_[blockStart + blockRow];
100 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
101 LO numEntries = (LO)indices.size();
102 for (LO k = 0; k < numEntries; k++) {
103 LO colOffset = colToBlockOffset[indices[k]];
104 if (blockStart <= colOffset && colOffset < blockEnd) {
108 LO blockCol = colOffset - blockStart;
109 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
110 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
111 LO r = this->bcrsBlockSize_ * blockRow + br;
112 LO c = this->bcrsBlockSize_ * blockCol + bc;
115 if (c - r > maxSuper)
124 LO blockStart = this->blockOffsets_[i];
125 LO blockEnd = (i == this->numBlocks_ - 1) ? this->blockRows_.size() : this->blockOffsets_[i + 1];
126 ArrayView<const LO> blockRows = this->getBlockRows(i);
130 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
132 LO localCol = this->translateRowToCol(blockRows[j]);
133 colToBlockOffset[localCol] = blockStart + j;
135 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
137 LO inputSplitRow = this->blockRows_[blockStart + blockRow];
138 auto rowView = this->getInputRowView(inputSplitRow);
139 for (
size_t k = 0; k < rowView.size(); k++) {
140 LO colOffset = colToBlockOffset[rowView.ind(k)];
141 if (blockStart <= colOffset && colOffset < blockEnd) {
142 LO blockCol = colOffset - blockStart;
143 maxSub = std::max(maxSub, blockRow - blockCol);
144 maxSuper = std::max(maxSuper, blockCol - blockRow);
154 template <
class MatrixType,
class LocalScalarType>
164 for (LO b = 0; b < this->numBlocks_; b++) {
165 LO stride = 2 * kl_[b] + ku_[b] + 1;
166 scalarOffsets_[b] = totalScalars;
167 totalScalars += stride * this->blockSizes_[b] * this->scalarsPerRow_;
169 scalars_.resize(totalScalars);
170 for (
int b = 0; b < this->numBlocks_; b++) {
173 LO nrows = this->blockSizes_[b] * this->scalarsPerRow_;
174 diagBlocks_.emplace_back(
Teuchos::View, scalars_.data() + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
177 std::fill(ipiv_.begin(), ipiv_.end(), 0);
180 this->IsComputed_ =
false;
181 this->IsInitialized_ =
true;
184 template <
class MatrixType,
class LocalScalarType>
188 ipiv_.size() != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
189 "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size. "
190 "Please report this bug to the Ifpack2 developers.");
192 this->IsComputed_ =
false;
193 if (!this->isInitialized()) {
200 this->IsComputed_ =
true;
203 template <
class MatrixType,
class LocalScalarType>
208 SC zero = STS::zero();
216 if (this->scalarsPerRow_ > 1) {
217 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
218 for (
int i = 0; i < this->numBlocks_; i++) {
220 LO blockStart = this->blockOffsets_[i];
221 LO blockEnd = blockStart + this->blockSizes_[i];
222 ArrayView<const LO> blockRows = this->getBlockRows(i);
226 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
227 LO localCol = this->translateRowToCol(blockRows[j]);
228 colToBlockOffset[localCol] = blockStart + j;
230 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
231 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
232 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
236 LO inputRow = this->blockRows_[blockStart + blockRow];
237 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
238 LO numEntries = (LO)indices.size();
239 for (LO k = 0; k < numEntries; k++) {
240 LO colOffset = colToBlockOffset[indices[k]];
241 if (blockStart <= colOffset && colOffset < blockEnd) {
245 LO blockCol = colOffset - blockStart;
246 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
247 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
248 LO r = this->bcrsBlockSize_ * blockRow + br;
249 LO c = this->bcrsBlockSize_ * blockCol + bc;
250 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
252 diagBlocks_[i](r, c) = val;
262 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
263 for (
int i = 0; i < this->numBlocks_; i++) {
265 LO blockStart = this->blockOffsets_[i];
266 LO blockEnd = blockStart + this->blockSizes_[i];
267 ArrayView<const LO> blockRows = this->getBlockRows(i);
271 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
273 LO localCol = this->translateRowToCol(blockRows[j]);
274 colToBlockOffset[localCol] = blockStart + j;
276 for (
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
278 LO inputPointRow = this->blockRows_[blockStart + blockRow];
279 auto rowView = this->getInputRowView(inputPointRow);
280 for (
size_t k = 0; k < rowView.size(); k++) {
281 LO colOffset = colToBlockOffset[rowView.ind(k)];
282 if (blockStart <= colOffset && colOffset < blockEnd) {
283 LO blockCol = colOffset - blockStart;
284 auto val = rowView.val(k);
286 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
294 template <
class MatrixType,
class LocalScalarType>
295 void BandedContainer<MatrixType, LocalScalarType>::
299 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
302 template <
class MatrixType,
class LocalScalarType>
303 void BandedContainer<MatrixType, LocalScalarType>::
308 for (
int i = 0; i < this->numBlocks_; i++) {
311 "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
313 "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
314 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
315 lapack.
GBTRF(diagBlocks_[i].numRows(),
316 diagBlocks_[i].numCols(),
317 diagBlocks_[i].lowerBandwidth(),
318 diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(),
319 diagBlocks_[i].values(),
320 diagBlocks_[i].stride(),
326 INFO < 0, std::logic_error,
327 "Ifpack2::BandedContainer::factor: "
328 "LAPACK's _GBTRF (LU factorization with partial pivoting) was called "
329 "incorrectly. INFO = "
331 "Please report this bug to the Ifpack2 developers.");
336 INFO > 0, std::runtime_error,
337 "Ifpack2::BandedContainer::factor: "
338 "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the "
339 "computed U factor is exactly singular. U("
340 << INFO <<
"," << INFO <<
") "
341 "(one-based index i) is exactly zero. This probably means that the input "
342 "matrix has a singular diagonal block.");
346 template <
class MatrixType,
class LocalScalarType>
347 void BandedContainer<MatrixType, LocalScalarType>::
348 solveBlock(ConstHostSubviewLocal X,
353 const LSC beta)
const {
354 #ifdef HAVE_IFPACK2_DEBUG
356 X.extent(0) != Y.extent(0),
358 "Ifpack2::BandedContainer::solveBlock: X and Y have "
359 "incompatible dimensions ("
360 << X.extent(0) <<
" resp. "
361 << Y.extent(0) <<
"). Please report this bug to "
362 "the Ifpack2 developers.");
364 X.extent(0) !=
static_cast<size_t>(mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
366 "Ifpack2::BandedContainer::solveBlock: The input "
367 "multivector X has incompatible dimensions from those of the "
369 << X.extent(0) <<
" vs. "
370 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
371 <<
"). Please report this bug to the Ifpack2 developers.");
373 Y.extent(0) !=
static_cast<size_t>(mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
375 "Ifpack2::BandedContainer::solveBlock: The output "
376 "multivector Y has incompatible dimensions from those of the "
378 << Y.extent(0) <<
" vs. "
379 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
380 <<
"). Please report this bug to the Ifpack2 developers.");
383 size_t numRows = X.extent(0);
384 size_t numVecs = X.extent(1);
392 for (
size_t j = 0; j < numVecs; j++)
393 for (
size_t i = 0; i < numRows; i++)
396 for (
size_t j = 0; j < numVecs; j++)
397 for (
size_t i = 0; i < numRows; i++)
398 Y(i, j) = beta * Y(i, j);
405 std::vector<LSC> yTemp(numVecs * numRows);
406 for (
size_t j = 0; j < numVecs; j++) {
407 for (
size_t i = 0; i < numRows; i++)
408 yTemp[j * numRows + i] = X(i, j);
414 const int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
417 diagBlocks_[blockIndex].numCols(),
418 diagBlocks_[blockIndex].lowerBandwidth(),
420 diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
422 diagBlocks_[blockIndex].values(),
423 diagBlocks_[blockIndex].stride(),
430 for (
size_t j = 0; j < numVecs; j++) {
431 for (
size_t i = 0; i < numRows; i++) {
432 Y(i, j) *= ISC(beta);
433 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
437 for (
size_t j = 0; j < numVecs; j++) {
438 for (
size_t i = 0; i < numRows; i++)
439 Y(i, j) = ISC(yTemp[j * numRows + i]);
444 INFO != 0, std::runtime_error,
445 "Ifpack2::BandedContainer::solveBlock: "
446 "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) "
447 "failed with INFO = "
448 << INFO <<
" != 0.");
452 template <
class MatrixType,
class LocalScalarType>
462 template <
class MatrixType,
class LocalScalarType>
466 std::ostringstream oss;
468 if (this->isInitialized()) {
469 if (this->isComputed()) {
470 oss <<
"{status = initialized, computed";
472 oss <<
"{status = initialized, not computed";
475 oss <<
"{status = not initialized, not computed";
481 template <
class MatrixType,
class LocalScalarType>
486 os <<
"================================================================================" << std::endl;
487 os <<
"Ifpack2::BandedContainer" << std::endl;
488 for (
int i = 0; i < this->numBlocks_; i++) {
489 os <<
"Block " << i <<
": Number of rows = " << this->blockSizes_[i] << std::endl;
490 os <<
"Block " << i <<
": Number of subdiagonals = " << diagBlocks_[i].lowerBandwidth() << std::endl;
491 os <<
"Block " << i <<
": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
493 os <<
"isInitialized() = " << this->IsInitialized_ << std::endl;
494 os <<
"isComputed() = " << this->IsComputed_ << std::endl;
495 os <<
"================================================================================" << std::endl;
499 template <
class MatrixType,
class LocalScalarType>
506 #define IFPACK2_BANDEDCONTAINER_INSTANT(S, LO, GO, N) \
507 template class Ifpack2::BandedContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
509 #endif // IFPACK2_BANDEDCONTAINER_HPP
virtual void initialize() override
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_BandedContainer_def.hpp:156
T & get(const std::string &name, T def_value)
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:307
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
virtual void compute() override
Initialize and compute each block.
Definition: Ifpack2_BandedContainer_def.hpp:186
bool isParameter(const std::string &name) const
virtual std::string description() const
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_BandedContainer_def.hpp:500
virtual std::ostream & print(std::ostream &os) const override
Print information about this object to the given output stream.
Definition: Ifpack2_BandedContainer_def.hpp:455
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
virtual ~BandedContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_BandedContainer_def.hpp:46
void computeBandwidth()
Definition: Ifpack2_BandedContainer_def.hpp:65
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
BandedContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &, bool pointIndexed)
Constructor.
Definition: Ifpack2_BandedContainer_def.hpp:29
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_BandedContainer_def.hpp:483
virtual void setParameters(const Teuchos::ParameterList &List) override
Set all necessary parameters (super- and sub-diagonal bandwidth)
Definition: Ifpack2_BandedContainer_def.hpp:50
Store and solve a local Banded linear problem.
Definition: Ifpack2_BandedContainer_decl.hpp:69
virtual std::string description() const override
A one-line description of this object.
Definition: Ifpack2_BandedContainer_def.hpp:465