10 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
11 #define IFPACK2_TRIDICONTAINER_DEF_HPP
19 #include "Teuchos_DefaultSerialComm.hpp"
24 template <
class MatrixType,
class LocalScalarType>
30 : ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed)
31 , ipiv_(this->blockRows_.size() * this->scalarsPerRow_)
32 , scalarOffsets_(this->numBlocks_) {
34 !matrix->hasColMap(), std::invalid_argument,
35 "Ifpack2::TriDiContainer: "
36 "The constructor's input matrix must have a column Map.");
42 scalarOffsets_[i] = scalarTotal;
45 if (actualBlockRows == 1) {
50 scalarTotal += 4 * (actualBlockRows - 1);
54 scalars_.
resize(scalarTotal);
55 diagBlocks_.reserve(this->numBlocks_);
61 template <
class MatrixType,
class LocalScalarType>
65 template <
class MatrixType,
class LocalScalarType>
67 for (
int i = 0; i < this->numBlocks_; i++)
69 this->IsInitialized_ =
true;
72 this->IsComputed_ =
false;
75 template <
class MatrixType,
class LocalScalarType>
77 #ifdef HAVE_IFPACK2_DEBUG
79 ipiv_.size() != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
80 "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
81 "Please report this bug to the Ifpack2 developers.");
84 this->IsComputed_ =
false;
85 if (!this->isInitialized()) {
90 this->IsComputed_ =
true;
93 template <
class MatrixType,
class LocalScalarType>
105 if (this->scalarsPerRow_ > 1) {
106 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
107 for (
int i = 0; i < this->numBlocks_; i++) {
109 LO blockStart = this->blockOffsets_[i];
110 LO blockEnd = blockStart + this->blockSizes_[i];
111 ArrayView<const LO> blockRows = this->getBlockRows(i);
115 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
116 LO localCol = this->translateRowToCol(blockRows[j]);
117 colToBlockOffset[localCol] = blockStart + j;
119 using h_inds_type =
typename block_crs_matrix_type::local_inds_host_view_type;
120 using h_vals_type =
typename block_crs_matrix_type::values_host_view_type;
121 for (LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++) {
125 LO inputRow = this->blockRows_[blockStart + blockRow];
126 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
127 LO numEntries = (LO)indices.size();
128 for (LO k = 0; k < numEntries; k++) {
129 LO colOffset = colToBlockOffset[indices[k]];
130 if (blockStart <= colOffset && colOffset < blockEnd) {
134 LO blockCol = colOffset - blockStart;
135 for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
136 for (LO br = 0; br < this->bcrsBlockSize_; br++) {
137 LO r = this->bcrsBlockSize_ * blockRow + br;
138 LO c = this->bcrsBlockSize_ * blockCol + bc;
139 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
141 diagBlocks_[i](r, c) = val;
151 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
152 for (
int i = 0; i < this->numBlocks_; i++) {
154 LO blockStart = this->blockOffsets_[i];
155 LO blockEnd = blockStart + this->blockSizes_[i];
156 ArrayView<const LO> blockRows = this->getBlockRows(i);
160 for (
size_t j = 0; j < size_t(blockRows.size()); j++) {
162 LO localCol = this->translateRowToCol(blockRows[j]);
163 colToBlockOffset[localCol] = blockStart + j;
165 for (
size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++) {
167 LO inputPointRow = this->blockRows_[blockStart + blockRow];
168 auto rowView = this->getInputRowView(inputPointRow);
169 for (
size_t k = 0; k < rowView.size(); k++) {
170 LO colOffset = colToBlockOffset[rowView.ind(k)];
171 if (blockStart <= colOffset && colOffset < blockEnd) {
172 LO blockCol = colOffset - blockStart;
173 auto val = rowView.val(k);
175 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
183 template <
class MatrixType,
class LocalScalarType>
184 void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks() {
187 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
190 template <
class MatrixType,
class LocalScalarType>
191 void TriDiContainer<MatrixType, LocalScalarType>::factor() {
192 for (
int i = 0; i < this->numBlocks_; i++) {
195 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
196 lapack.
GTTRF(diagBlocks_[i].numRowsCols(),
200 diagBlocks_[i].DU2(),
204 INFO < 0, std::logic_error,
205 "Ifpack2::TriDiContainer::factor: "
206 "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
207 "incorrectly. INFO = "
209 "Please report this bug to the Ifpack2 developers.");
214 INFO > 0, std::runtime_error,
215 "Ifpack2::TriDiContainer::factor: "
216 "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
217 "computed U factor is exactly singular. U("
218 << INFO <<
"," << INFO <<
") "
219 "(one-based index i) is exactly zero. This probably means that the input "
220 "matrix has a singular diagonal block.");
224 template <
class MatrixType,
class LocalScalarType>
233 size_t numVecs = X.extent(1);
234 size_t numRows = X.extent(0);
236 #ifdef HAVE_IFPACK2_DEBUG
238 X.extent(0) != Y.extent(0),
240 "Ifpack2::TriDiContainer::solveBlock: X and Y have "
241 "incompatible dimensions ("
242 << X.extent(0) <<
" resp. "
243 << Y.extent(0) <<
"). Please report this bug to "
244 "the Ifpack2 developers.");
246 X.extent(0) !=
static_cast<size_t>(diagBlocks_[blockIndex].numRowsCols()),
248 "Ifpack2::TriDiContainer::solveBlock: The input "
249 "multivector X has incompatible dimensions from those of the "
251 << X.extent(0) <<
" vs. "
252 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols())
253 <<
"). Please report this bug to the Ifpack2 developers.");
254 TEUCHOS_TEST_FOR_EXCEPTION(
255 Y.extent(0) !=
static_cast<size_t>(diagBlocks_[blockIndex].numRowsCols()),
257 "Ifpack2::TriDiContainer::solveBlock: The output "
258 "multivector Y has incompatible dimensions from those of the "
260 << Y.extent(0) <<
" vs. "
261 << (mode ==
Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols())
262 <<
"). Please report this bug to the Ifpack2 developers.");
270 for (
size_t j = 0; j < numVecs; j++)
271 for (
size_t i = 0; i < numRows; i++)
274 for (
size_t j = 0; j < numVecs; j++)
275 for (
size_t i = 0; i < numRows; i++)
276 Y(i, j) *=
ISC(beta);
284 std::vector<LSC> yTemp(numVecs * numRows);
285 for (
size_t j = 0; j < numVecs; j++) {
286 for (
size_t i = 0; i < numRows; i++)
287 yTemp[j * numRows + i] = X(i, j);
293 int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
295 diagBlocks_[blockIndex].numRowsCols(),
297 diagBlocks_[blockIndex].DL(),
298 diagBlocks_[blockIndex].D(),
299 diagBlocks_[blockIndex].DU(),
300 diagBlocks_[blockIndex].DU2(),
307 for (
size_t j = 0; j < numVecs; j++) {
308 for (
size_t i = 0; i < numRows; i++) {
309 Y(i, j) *=
ISC(beta);
310 Y(i, j) +=
ISC(alpha * yTemp[j * numRows + i]);
314 for (
size_t j = 0; j < numVecs; j++) {
315 for (
size_t i = 0; i < numRows; i++)
316 Y(i, j) =
ISC(yTemp[j * numRows + i]);
320 TEUCHOS_TEST_FOR_EXCEPTION(
321 INFO != 0, std::runtime_error,
322 "Ifpack2::TriDiContainer::solveBlock: "
323 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
324 "failed with INFO = "
325 << INFO <<
" != 0.");
329 template <
class MatrixType,
class LocalScalarType>
337 template <
class MatrixType,
class LocalScalarType>
339 std::ostringstream oss;
341 if (this->isInitialized()) {
342 if (this->isComputed()) {
343 oss <<
"{status = initialized, computed";
345 oss <<
"{status = initialized, not computed";
348 oss <<
"{status = not initialized, not computed";
355 template <
class MatrixType,
class LocalScalarType>
361 os <<
"================================================================================" << endl;
362 os <<
"Ifpack2::TriDiContainer" << endl;
363 os <<
"Number of blocks = " << this->numBlocks_ << endl;
364 os <<
"isInitialized() = " << this->IsInitialized_ << endl;
365 os <<
"isComputed() = " << this->IsComputed_ << endl;
366 os <<
"================================================================================" << endl;
370 template <
class MatrixType,
class LocalScalarType>
375 #define IFPACK2_TRIDICONTAINER_INSTANT(S, LO, GO, N) \
376 template class Ifpack2::TriDiContainer<Tpetra::RowMatrix<S, LO, GO, N>, S>;
Store and solve a local TriDi linear problem.
Definition: Ifpack2_TriDiContainer_decl.hpp:72
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_TriDiContainer_def.hpp:338
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_TriDiContainer_def.hpp:371
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:261
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_TriDiContainer_def.hpp:76
typename Kokkos::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container_decl.hpp:101
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:257
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:282
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_TriDiContainer_def.hpp:63
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_TriDiContainer_def.hpp:357
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_TriDiContainer_def.hpp:330
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
void resize(size_type new_size, const value_type &x=value_type())
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO >> &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_TriDiContainer_def.hpp:66
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition: Ifpack2_TriDiContainer_def.hpp:226