Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_SparseContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_SPARSECONTAINER_DEF_HPP
11 #define IFPACK2_SPARSECONTAINER_DEF_HPP
12 
14 #ifdef HAVE_MPI
15 #include <mpi.h>
17 #else
18 #include "Teuchos_DefaultSerialComm.hpp"
19 #endif
21 
22 namespace Ifpack2 {
23 
24 //==============================================================================
25 template <class MatrixType, class InverseType>
28  const Teuchos::Array<Teuchos::Array<LO>>& partitions,
30  bool pointIndexed)
31  : ContainerImpl<MatrixType, InverseScalar>(matrix, partitions, pointIndexed)
32  ,
33 #ifdef HAVE_MPI
34  localComm_(Teuchos::rcp(new Teuchos::MpiComm<int>(MPI_COMM_SELF)))
35 #else
36  localComm_(Teuchos::rcp(new Teuchos::SerialComm<int>()))
37 #endif // HAVE_MPI
38 {
39 }
40 
41 //==============================================================================
42 template <class MatrixType, class InverseType>
45 
46 //==============================================================================
47 template <class MatrixType, class InverseType>
49  List_ = List;
50 }
51 
52 //==============================================================================
53 template <class MatrixType, class InverseType>
55  // We assume that if you called this method, you intend to recompute
56  // everything. Thus, we release references to all the internal
57  // objects. We do this first to save memory. (In an RCP
58  // assignment, the right-hand side and left-hand side coexist before
59  // the left-hand side's reference count gets updated.)
61  Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::initialize");
62  Teuchos::TimeMonitor timeMon(*timer);
63 
64  // Will create the diagonal blocks and their inverses
65  // in extract()
66  diagBlocks_.assign(this->numBlocks_, Teuchos::null);
67  Inverses_.assign(this->numBlocks_, Teuchos::null);
68 
69  // Extract the submatrices.
70  this->extractGraph();
71 
72  // Initialize the inverse operator.
73  for (int i = 0; i < this->numBlocks_; i++) {
74  Inverses_[i]->setParameters(List_);
75  Inverses_[i]->initialize();
76  }
77 
78  this->IsInitialized_ = true;
79  this->IsComputed_ = false;
80 }
81 
82 //==============================================================================
83 template <class MatrixType, class InverseType>
86  Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::compute");
87  Teuchos::TimeMonitor timeMon(*timer);
88 
89  this->IsComputed_ = false;
90  if (!this->isInitialized()) {
91  this->initialize();
92  }
93 
94  // Extract the submatrices values
95  this->extractValues();
96 
97  // Compute the inverse operator.
98  for (int i = 0; i < this->numBlocks_; i++) {
99  Inverses_[i]->compute();
100  }
101 
102  this->IsComputed_ = true;
103 }
104 
105 //==============================================================================
106 template <class MatrixType, class InverseType>
108  for (auto inv : Inverses_)
109  delete inv.get();
110  Inverses_.clear();
111  diagBlocks_.clear();
113 }
114 
115 //==============================================================================
116 template <class MatrixType, class InverseType>
118  solveBlockMV(const inverse_mv_type& X,
119  inverse_mv_type& Y,
120  int blockIndex,
121  Teuchos::ETransp mode,
122  InverseScalar alpha,
123  InverseScalar beta) const {
125  Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() != X.getLocalLength(),
126  std::logic_error,
127  "Ifpack2::SparseContainer::apply: Inverse_ "
128  "operator and X have incompatible dimensions ("
129  << Inverses_[blockIndex]->getDomainMap()->getLocalNumElements() << " resp. "
130  << X.getLocalLength() << "). Please report this bug to "
131  "the Ifpack2 developers.");
133  Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() != Y.getLocalLength(),
134  std::logic_error,
135  "Ifpack2::SparseContainer::apply: Inverse_ "
136  "operator and Y have incompatible dimensions ("
137  << Inverses_[blockIndex]->getRangeMap()->getLocalNumElements() << " resp. "
138  << Y.getLocalLength() << "). Please report this bug to "
139  "the Ifpack2 developers.");
140  Inverses_[blockIndex]->apply(X, Y, mode, alpha, beta);
141 }
142 
143 template <class MatrixType, class InverseType>
145  apply(ConstHostView X,
146  HostView Y,
147  int blockIndex,
148  Teuchos::ETransp mode,
149  SC alpha,
150  SC beta) const {
151  using Teuchos::ArrayView;
152  using Teuchos::as;
153 
154  // The InverseType template parameter might have different template
155  // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
156  // example, MatrixType (a global object) might use a bigger GO
157  // (global ordinal) type than InverseType (which deals with the
158  // diagonal block, a local object). This means that we might have
159  // to convert X and Y to the Tpetra::MultiVector specialization that
160  // InverseType wants. This class' X_ and Y_ internal fields are of
161  // the right type for InverseType, so we can use those as targets.
163  Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::apply");
164  Teuchos::TimeMonitor timeMon(*timer);
165 
166  // Tpetra::MultiVector specialization corresponding to InverseType.
168  size_t numVecs = X.extent(1);
169 
171  !this->IsComputed_, std::runtime_error,
172  "Ifpack2::SparseContainer::apply: "
173  "You must have called the compute() method before you may call apply(). "
174  "You may call the apply() method as many times as you want after calling "
175  "compute() once, but you must have called compute() at least once.");
177  X.extent(1) != Y.extent(1), std::runtime_error,
178  "Ifpack2::SparseContainer::apply: X and Y have different numbers of "
179  "vectors. X has "
180  << X.extent(1)
181  << ", but Y has " << Y.extent(1) << ".");
182 
183  if (numVecs == 0) {
184  return; // done! nothing to do
185  }
186 
187  const LO numRows = this->blockSizes_[blockIndex];
188 
189  // The operator Inverse_ works on a permuted subset of the local
190  // parts of X and Y. The subset and permutation are defined by the
191  // index array returned by getBlockRows(). If the permutation is
192  // trivial and the subset is exactly equal to the local indices,
193  // then we could use the local parts of X and Y exactly, without
194  // needing to permute. Otherwise, we have to use temporary storage
195  // to permute X and Y. For now, we always use temporary storage.
196  //
197  // Create temporary permuted versions of the input and output.
198  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
199  // store the permuted versions of X resp. Y. Note that X_local has
200  // the domain Map of the operator, which may be a permuted subset of
201  // the local Map corresponding to X.getMap(). Similarly, Y_local
202  // has the range Map of the operator, which may be a permuted subset
203  // of the local Map corresponding to Y.getMap(). numRows here
204  // gives the number of rows in the row Map of the local Inverse_
205  // operator.
206  //
207  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
208  // here that the row Map and the range Map of that operator are
209  // the same.
210  //
211  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
212  // really belongs in Tpetra.
213 
214  if (invX.size() == 0) {
215  for (LO i = 0; i < this->numBlocks_; i++)
216  invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
217  for (LO i = 0; i < this->numBlocks_; i++)
218  invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
219  }
220  inverse_mv_type& X_local = invX[blockIndex];
222  X_local.getLocalLength() != size_t(numRows * this->scalarsPerRow_), std::logic_error,
223  "Ifpack2::SparseContainer::apply: "
224  "X_local has length "
225  << X_local.getLocalLength() << ", which does "
226  "not match numRows = "
227  << numRows * this->scalarsPerRow_ << ". Please report this bug to "
228  "the Ifpack2 developers.");
229  const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
230  if (this->scalarsPerRow_ == 1)
231  mvgs.gatherMVtoView(X_local, X, blockRows);
232  else
233  mvgs.gatherMVtoViewBlock(X_local, X, blockRows, this->scalarsPerRow_);
234 
235  // We must gather the output multivector Y even on input to
236  // Inverse_->apply(), since the Inverse_ operator might use it as an
237  // initial guess for a linear solve. We have no way of knowing
238  // whether it does or does not.
239 
240  inverse_mv_type& Y_local = invY[blockIndex];
242  Y_local.getLocalLength() != size_t(numRows * this->scalarsPerRow_), std::logic_error,
243  "Ifpack2::SparseContainer::apply: "
244  "Y_local has length "
245  << Y_local.getLocalLength() << ", which does "
246  "not match numRows = "
247  << numRows * this->scalarsPerRow_ << ". Please report this bug to "
248  "the Ifpack2 developers.");
249 
250  if (this->scalarsPerRow_ == 1)
251  mvgs.gatherMVtoView(Y_local, Y, blockRows);
252  else
253  mvgs.gatherMVtoViewBlock(Y_local, Y, blockRows, this->scalarsPerRow_);
254 
255  // Apply the local operator:
256  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
257  this->solveBlockMV(X_local, Y_local, blockIndex, mode,
258  InverseScalar(alpha), InverseScalar(beta));
259 
260  // Scatter the permuted subset output vector Y_local back into the
261  // original output multivector Y.
262  if (this->scalarsPerRow_ == 1)
263  mvgs.scatterMVtoView(Y, Y_local, blockRows);
264  else
265  mvgs.scatterMVtoViewBlock(Y, Y_local, blockRows, this->scalarsPerRow_);
266 }
267 
268 //==============================================================================
269 template <class MatrixType, class InverseType>
271  weightedApply(ConstHostView X,
272  HostView Y,
273  ConstHostView D,
274  int blockIndex,
275  Teuchos::ETransp mode,
276  SC alpha,
277  SC beta) const {
278  using std::cerr;
279  using std::endl;
280  using Teuchos::ArrayView;
281  using Teuchos::Range1D;
283 
284  // The InverseType template parameter might have different template
285  // parameters (Scalar, LO, GO, and/or Node) than MatrixType. For
286  // example, MatrixType (a global object) might use a bigger GO
287  // (global ordinal) type than InverseType (which deals with the
288  // diagonal block, a local object). This means that we might have
289  // to convert X and Y to the Tpetra::MultiVector specialization that
290  // InverseType wants. This class' X_ and Y_ internal fields are of
291  // the right type for InverseType, so we can use those as targets.
292 
293  // Tpetra::Vector specialization corresponding to InverseType.
294  typedef Tpetra::Vector<InverseScalar, InverseLocalOrdinal, InverseGlobalOrdinal, InverseNode> inverse_vector_type;
295 
297  const size_t numVecs = X.extent(1);
298 
300  !this->IsComputed_, std::runtime_error,
301  "Ifpack2::SparseContainer::"
302  "weightedApply: You must have called the compute() method before you may "
303  "call apply(). You may call the apply() method as many times as you want "
304  "after calling compute() once, but you must have called compute() at least "
305  "once.");
307  X.extent(1) != Y.extent(1), std::runtime_error,
308  "Ifpack2::SparseContainer::weightedApply: X and Y have different numbers "
309  "of vectors. X has "
310  << X.extent(1) << ", but Y has "
311  << Y.extent(1) << ".");
312 
313  // bmk 7-2019: BlockRelaxation already checked this, but if that changes...
315  this->scalarsPerRow_ > 1, std::logic_error,
316  "Ifpack2::SparseContainer::weightedApply: "
317  "Use of block rows isn't allowed in overlapping Jacobi (the only method that uses weightedApply");
318 
319  if (numVecs == 0) {
320  return; // done! nothing to do
321  }
322 
323  // The operator Inverse_ works on a permuted subset of the local
324  // parts of X and Y. The subset and permutation are defined by the
325  // index array returned by getBlockRows(). If the permutation is
326  // trivial and the subset is exactly equal to the local indices,
327  // then we could use the local parts of X and Y exactly, without
328  // needing to permute. Otherwise, we have to use temporary storage
329  // to permute X and Y. For now, we always use temporary storage.
330  //
331  // Create temporary permuted versions of the input and output.
332  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
333  // store the permuted versions of X resp. Y. Note that X_local has
334  // the domain Map of the operator, which may be a permuted subset of
335  // the local Map corresponding to X.getMap(). Similarly, Y_local
336  // has the range Map of the operator, which may be a permuted subset
337  // of the local Map corresponding to Y.getMap(). numRows here
338  // gives the number of rows in the row Map of the local Inverse_
339  // operator.
340  //
341  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
342  // here that the row Map and the range Map of that operator are
343  // the same.
344  //
345  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
346  // really belongs in Tpetra.
347  const LO numRows = this->blockSizes_[blockIndex];
348 
349  if (invX.size() == 0) {
350  for (LO i = 0; i < this->numBlocks_; i++)
351  invX.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
352  for (LO i = 0; i < this->numBlocks_; i++)
353  invY.emplace_back(Inverses_[i]->getDomainMap(), numVecs);
354  }
355  inverse_mv_type& X_local = invX[blockIndex];
356  const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
357  mvgs.gatherMVtoView(X_local, X, blockRows);
358 
359  // We must gather the output multivector Y even on input to
360  // Inverse_->apply(), since the Inverse_ operator might use it as an
361  // initial guess for a linear solve. We have no way of knowing
362  // whether it does or does not.
363 
364  inverse_mv_type Y_local = invY[blockIndex];
365  TEUCHOS_TEST_FOR_EXCEPTION(
366  Y_local.getLocalLength() != size_t(numRows), std::logic_error,
367  "Ifpack2::SparseContainer::weightedApply: "
368  "Y_local has length "
369  << X_local.getLocalLength() << ", which does "
370  "not match numRows = "
371  << numRows << ". Please report this bug to "
372  "the Ifpack2 developers.");
373  mvgs.gatherMVtoView(Y_local, Y, blockRows);
374 
375  // Apply the diagonal scaling D to the input X. It's our choice
376  // whether the result has the original input Map of X, or the
377  // permuted subset Map of X_local. If the latter, we also need to
378  // gather D into the permuted subset Map. We choose the latter, to
379  // save memory and computation. Thus, we do the following:
380  //
381  // 1. Gather D into a temporary vector D_local.
382  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
383  // 3. Compute X_scaled := diag(D_loca) * X_local.
384 
385  inverse_vector_type D_local(Inverses_[blockIndex]->getDomainMap());
386  TEUCHOS_TEST_FOR_EXCEPTION(
387  D_local.getLocalLength() != size_t(this->blockSizes_[blockIndex]), std::logic_error,
388  "Ifpack2::SparseContainer::weightedApply: "
389  "D_local has length "
390  << X_local.getLocalLength() << ", which does "
391  "not match numRows = "
392  << this->blockSizes_[blockIndex] << ". Please report this bug to "
393  "the Ifpack2 developers.");
394  mvgs.gatherMVtoView(D_local, D, blockRows);
395  inverse_mv_type X_scaled(Inverses_[blockIndex]->getDomainMap(), numVecs);
396  X_scaled.elementWiseMultiply(STS::one(), D_local, X_local, STS::zero());
397 
398  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
399  // can write the result of Inverse_->apply() directly to Y_local, so
400  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
401  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
402  // different than Y_local.
403  inverse_mv_type* Y_temp;
404  if (InverseScalar(beta) == STS::zero()) {
405  Y_temp = &Y_local;
406  } else {
407  Y_temp = new inverse_mv_type(Inverses_[blockIndex]->getRangeMap(), numVecs);
408  }
409  // Apply the local operator: Y_temp := M^{-1} * X_scaled
410  Inverses_[blockIndex]->apply(X_scaled, *Y_temp, mode);
411  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_tmp.
412  //
413  // Note that we still use the permuted subset scaling D_local here,
414  // because Y_temp has the same permuted subset Map. That's good, in
415  // fact, because it's a subset: less data to read and multiply.
416  Y_local.elementWiseMultiply(alpha, D_local, *Y_temp, beta);
417  if (Y_temp != &Y_local)
418  delete Y_temp;
419  // Copy the permuted subset output vector Y_local into the original
420  // output multivector Y.
421  mvgs.scatterMVtoView(Y, Y_local, blockRows);
422 }
423 
424 //==============================================================================
425 template <class MatrixType, class InverseType>
426 std::ostream& SparseContainer<MatrixType, InverseType>::print(std::ostream& os) const {
427  Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
428  fos.setOutputToRootOnly(0);
429  describe(fos);
430  return (os);
431 }
432 
433 //==============================================================================
434 template <class MatrixType, class InverseType>
436  std::ostringstream oss;
437  oss << "\"Ifpack2::SparseContainer\": {";
438  if (this->isInitialized()) {
439  if (this->isComputed()) {
440  oss << "status = initialized, computed";
441  } else {
442  oss << "status = initialized, not computed";
443  }
444  } else {
445  oss << "status = not initialized, not computed";
446  }
447  for (int i = 0; i < this->numBlocks_; i++) {
448  oss << ", Block Inverse " << i << ": {";
449  oss << Inverses_[i]->description();
450  oss << "}";
451  }
452  oss << "}";
453  return oss.str();
454 }
455 
456 //==============================================================================
457 template <class MatrixType, class InverseType>
459  using std::endl;
460  if (verbLevel == Teuchos::VERB_NONE) return;
461  os << "================================================================================" << endl;
462  os << "Ifpack2::SparseContainer" << endl;
463  for (int i = 0; i < this->numBlocks_; i++) {
464  os << "Block " << i << " rows: = " << this->blockSizes_[i] << endl;
465  }
466  os << "isInitialized() = " << this->IsInitialized_ << endl;
467  os << "isComputed() = " << this->IsComputed_ << endl;
468  os << "================================================================================" << endl;
469  os << endl;
470 }
471 
472 //==============================================================================
473 template <class MatrixType, class InverseType>
475  extract() {
476  using Teuchos::Array;
477  using Teuchos::ArrayView;
478  using Teuchos::RCP;
479  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
480  // To extract diagonal blocks, need to translate local rows to local columns.
481  // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
482  // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
483  // offset - blockOffsets_[b]: gives the column within block b.
484  //
485  // This provides the block and col within a block in O(1).
486  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
487  Teuchos::Array<InverseScalar> valuesToInsert;
488  if (this->scalarsPerRow_ > 1) {
489  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
490  for (int i = 0; i < this->numBlocks_; i++) {
491  // Get the interval where block i is defined in blockRows_
492  LO blockStart = this->blockOffsets_[i];
493  LO blockSize = this->blockSizes_[i];
494  LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
495  LO blockEnd = blockStart + blockSize;
496  ArrayView<const LO> blockRows = this->getBlockRows(i);
497  // Set the lookup table entries for the columns appearing in block i.
498  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
499  // this is OK. The values updated here are only needed to process block i's entries.
500  for (LO j = 0; j < blockSize; j++) {
501  LO localCol = this->translateRowToCol(blockRows[j]);
502  colToBlockOffset[localCol] = blockStart + j;
503  }
504  // First, count the number of entries in each row of block i
505  //(to allocate it with StaticProfile)
506  Array<size_t> rowEntryCounts(blockPointSize, 0);
507  // blockRow counts the BlockCrs LIDs that are going into this block
508  // Rows are inserted into the CrsMatrix in sequential order
509  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
510  using vals_type = typename block_crs_matrix_type::values_host_view_type;
511  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
512  // get a raw view of the whole block row
513  inds_type indices;
514  vals_type values;
515  LO inputRow = this->blockRows_[blockStart + blockRow];
516  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
517  LO numEntries = (LO)indices.size();
518  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
519  for (LO k = 0; k < numEntries; k++) {
520  LO colOffset = colToBlockOffset[indices[k]];
521  if (blockStart <= colOffset && colOffset < blockEnd) {
522  rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
523  }
524  }
525  }
526  }
527  // now that row sizes are known, can allocate the diagonal matrix
528  RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
529  diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
530  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
531  // insert the actual entries, one row at a time
532  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
533  // get a raw view of the whole block row
534  inds_type indices;
535  vals_type values;
536  LO inputRow = this->blockRows_[blockStart + blockRow];
537  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
538  LO numEntries = (LO)indices.size();
539  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
540  indicesToInsert.clear();
541  valuesToInsert.clear();
542  for (LO k = 0; k < numEntries; k++) {
543  LO colOffset = colToBlockOffset[indices[k]];
544  if (blockStart <= colOffset && colOffset < blockEnd) {
545  LO blockCol = colOffset - blockStart;
546  // bc iterates over the columns in (block) entry k
547  for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
548  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
549  valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
550  }
551  }
552  }
553  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
554  if (indicesToInsert.size())
555  diagBlocks_[i]->insertGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
556  }
557  }
558  diagBlocks_[i]->fillComplete();
559  }
560  } else {
561  // get the mapping from point-indexed matrix columns to offsets in blockRows_
562  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
563  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
564  for (int i = 0; i < this->numBlocks_; i++) {
565  // Get the interval where block i is defined in blockRows_
566  LO blockStart = this->blockOffsets_[i];
567  LO blockSize = this->blockSizes_[i];
568  LO blockEnd = blockStart + blockSize;
569  ArrayView<const LO> blockRows = this->getBlockRows(i);
570  // Set the lookup table entries for the columns appearing in block i.
571  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
572  // this is OK. The values updated here are only needed to process block i's entries.
573  for (LO j = 0; j < blockSize; j++) {
574  // translateRowToCol will return the corresponding split column
575  LO localCol = this->translateRowToCol(blockRows[j]);
576  colToBlockOffset[localCol] = blockStart + j;
577  }
578  Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
579  for (LO j = 0; j < blockSize; j++) {
580  rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
581  }
582  RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
583  diagBlocks_[i] = rcp(new InverseCrs(tempMap, rowEntryCounts));
584  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
585  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
586  valuesToInsert.clear();
587  indicesToInsert.clear();
588  // get a view of the split row
589  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
590  auto rowView = this->getInputRowView(inputSplitRow);
591  for (size_t k = 0; k < rowView.size(); k++) {
592  LO colOffset = colToBlockOffset[rowView.ind(k)];
593  if (blockStart <= colOffset && colOffset < blockEnd) {
594  LO blockCol = colOffset - blockStart;
595  indicesToInsert.push_back(blockCol);
596  valuesToInsert.push_back(rowView.val(k));
597  }
598  }
599  if (indicesToInsert.size())
600  diagBlocks_[i]->insertGlobalValues(blockRow, indicesToInsert(), valuesToInsert());
601  }
602  diagBlocks_[i]->fillComplete();
603  }
604  }
605 }
606 
607 //==============================================================================
608 template <class MatrixType, class InverseType>
609 void SparseContainer<MatrixType, InverseType>::
610  extractGraph() {
611  using Teuchos::Array;
612  using Teuchos::ArrayView;
613  using Teuchos::RCP;
614  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
615  // To extract diagonal blocks, need to translate local rows to local columns.
616  // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
617  // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
618  // offset - blockOffsets_[b]: gives the column within block b.
619  //
620  // This provides the block and col within a block in O(1).
622  Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::extractGraph");
623  Teuchos::TimeMonitor timeMon(*timer);
624 
625  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
626  if (this->scalarsPerRow_ > 1) {
627  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
628  for (int i = 0; i < this->numBlocks_; i++) {
629  // Get the interval where block i is defined in blockRows_
630  LO blockStart = this->blockOffsets_[i];
631  LO blockSize = this->blockSizes_[i];
632  LO blockPointSize = this->blockSizes_[i] * this->scalarsPerRow_;
633  LO blockEnd = blockStart + blockSize;
634  ArrayView<const LO> blockRows = this->getBlockRows(i);
635  // Set the lookup table entries for the columns appearing in block i.
636  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
637  // this is OK. The values updated here are only needed to process block i's entries.
638  for (LO j = 0; j < blockSize; j++) {
639  LO localCol = this->translateRowToCol(blockRows[j]);
640  colToBlockOffset[localCol] = blockStart + j;
641  }
642  // First, count the number of entries in each row of block i
643  //(to allocate it with StaticProfile)
644  Array<size_t> rowEntryCounts(blockPointSize, 0);
645  // blockRow counts the BlockCrs LIDs that are going into this block
646  // Rows are inserted into the CrsMatrix in sequential order
647  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
648  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
649  // get a raw view of the whole block row
650  inds_type indices;
651  LO inputRow = this->blockRows_[blockStart + blockRow];
652  this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
653  LO numEntries = (LO)indices.size();
654  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
655  for (LO k = 0; k < numEntries; k++) {
656  LO colOffset = colToBlockOffset[indices[k]];
657  if (blockStart <= colOffset && colOffset < blockEnd) {
658  rowEntryCounts[blockRow * this->bcrsBlockSize_ + br] += this->bcrsBlockSize_;
659  }
660  }
661  }
662  }
663  // now that row sizes are known, can allocate the diagonal matrix
664  RCP<InverseMap> tempMap(new InverseMap(blockPointSize, 0, this->localComm_));
665  auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
666  // insert the actual entries, one row at a time
667  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
668  // get a raw view of the whole block row
669  inds_type indices;
670  LO inputRow = this->blockRows_[blockStart + blockRow];
671  this->inputBlockMatrix_->getGraph()->getLocalRowView(inputRow, indices);
672  LO numEntries = (LO)indices.size();
673  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
674  indicesToInsert.clear();
675  for (LO k = 0; k < numEntries; k++) {
676  LO colOffset = colToBlockOffset[indices[k]];
677  if (blockStart <= colOffset && colOffset < blockEnd) {
678  LO blockCol = colOffset - blockStart;
679  // bc iterates over the columns in (block) entry k
680  for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
681  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
682  }
683  }
684  }
685  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
686  if (indicesToInsert.size())
687  diagGraph->insertGlobalIndices(rowToInsert, indicesToInsert());
688  }
689  }
690  diagGraph->fillComplete();
691 
692  // create matrix block
693  diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
694  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
695  }
696  } else {
697  // get the mapping from point-indexed matrix columns to offsets in blockRows_
698  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
699  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
700  for (int i = 0; i < this->numBlocks_; i++) {
701  // Get the interval where block i is defined in blockRows_
702  LO blockStart = this->blockOffsets_[i];
703  LO blockSize = this->blockSizes_[i];
704  LO blockEnd = blockStart + blockSize;
705  ArrayView<const LO> blockRows = this->getBlockRows(i);
706  // Set the lookup table entries for the columns appearing in block i.
707  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
708  // this is OK. The values updated here are only needed to process block i's entries.
709  for (LO j = 0; j < blockSize; j++) {
710  // translateRowToCol will return the corresponding split column
711  LO localCol = this->translateRowToCol(blockRows[j]);
712  colToBlockOffset[localCol] = blockStart + j;
713  }
714  Teuchos::Array<size_t> rowEntryCounts(blockSize, 0);
715  for (LO j = 0; j < blockSize; j++) {
716  rowEntryCounts[j] = this->getInputRowView(this->blockRows_[blockStart + j]).size();
717  }
718  RCP<InverseMap> tempMap(new InverseMap(blockSize, 0, this->localComm_));
719  auto diagGraph = rcp(new InverseGraph(tempMap, rowEntryCounts));
720  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
721  indicesToInsert.clear();
722  // get a view of the split row
723  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
724  auto rowView = this->getInputRowView(inputSplitRow);
725  for (size_t k = 0; k < rowView.size(); k++) {
726  LO colOffset = colToBlockOffset[rowView.ind(k)];
727  if (blockStart <= colOffset && colOffset < blockEnd) {
728  LO blockCol = colOffset - blockStart;
729  indicesToInsert.push_back(blockCol);
730  }
731  }
732  if (indicesToInsert.size())
733  diagGraph->insertGlobalIndices(blockRow, indicesToInsert());
734  }
735  diagGraph->fillComplete();
736 
737  // create matrix block
738  diagBlocks_[i] = rcp(new InverseCrs(diagGraph));
739  Inverses_[i] = rcp(new InverseType(diagBlocks_[i]));
740  }
741  }
742 }
743 
744 //==============================================================================
745 template <class MatrixType, class InverseType>
746 void SparseContainer<MatrixType, InverseType>::
747  extractValues() {
748  using Teuchos::Array;
749  using Teuchos::ArrayView;
750  using Teuchos::RCP;
751  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
752  // To extract diagonal blocks, need to translate local rows to local columns.
753  // Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
754  // blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
755  // offset - blockOffsets_[b]: gives the column within block b.
756  //
757  // This provides the block and col within a block in O(1).
759  Teuchos::TimeMonitor::getNewCounter("Ifpack2::SparseContainer::extractValues");
760  Teuchos::TimeMonitor timeMon(*timer);
761 
762  Teuchos::Array<InverseGlobalOrdinal> indicesToInsert;
763  Teuchos::Array<InverseScalar> valuesToInsert;
764  if (this->scalarsPerRow_ > 1) {
765  Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
766  for (int i = 0; i < this->numBlocks_; i++) {
767  // Get the interval where block i is defined in blockRows_
768  LO blockStart = this->blockOffsets_[i];
769  LO blockSize = this->blockSizes_[i];
770  LO blockEnd = blockStart + blockSize;
771  ArrayView<const LO> blockRows = this->getBlockRows(i);
772  // Set the lookup table entries for the columns appearing in block i.
773  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
774  // this is OK. The values updated here are only needed to process block i's entries.
775  for (LO j = 0; j < blockSize; j++) {
776  LO localCol = this->translateRowToCol(blockRows[j]);
777  colToBlockOffset[localCol] = blockStart + j;
778  }
779  using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
780  using vals_type = typename block_crs_matrix_type::values_host_view_type;
781  // insert the actual entries, one row at a time
782  diagBlocks_[i]->resumeFill();
783  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
784  // get a raw view of the whole block row
785  inds_type indices;
786  vals_type values;
787  LO inputRow = this->blockRows_[blockStart + blockRow];
788  this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
789  LO numEntries = (LO)indices.size();
790  for (LO br = 0; br < this->bcrsBlockSize_; br++) {
791  indicesToInsert.clear();
792  valuesToInsert.clear();
793  for (LO k = 0; k < numEntries; k++) {
794  LO colOffset = colToBlockOffset[indices[k]];
795  if (blockStart <= colOffset && colOffset < blockEnd) {
796  LO blockCol = colOffset - blockStart;
797  // bc iterates over the columns in (block) entry k
798  for (LO bc = 0; bc < this->bcrsBlockSize_; bc++) {
799  indicesToInsert.push_back(blockCol * this->bcrsBlockSize_ + bc);
800  valuesToInsert.push_back(values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + bc * this->bcrsBlockSize_ + br]);
801  }
802  }
803  }
804  InverseGlobalOrdinal rowToInsert = blockRow * this->bcrsBlockSize_ + br;
805  if (indicesToInsert.size())
806  diagBlocks_[i]->replaceGlobalValues(rowToInsert, indicesToInsert(), valuesToInsert());
807  }
808  }
809  diagBlocks_[i]->fillComplete();
810  }
811  } else {
812  // get the mapping from point-indexed matrix columns to offsets in blockRows_
813  //(this includes regular CrsMatrix columns, in which case scalarsPerRow_ == 1)
814  Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
815  for (int i = 0; i < this->numBlocks_; i++) {
816  // Get the interval where block i is defined in blockRows_
817  LO blockStart = this->blockOffsets_[i];
818  LO blockSize = this->blockSizes_[i];
819  LO blockEnd = blockStart + blockSize;
820  ArrayView<const LO> blockRows = this->getBlockRows(i);
821  // Set the lookup table entries for the columns appearing in block i.
822  // If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
823  // this is OK. The values updated here are only needed to process block i's entries.
824  for (LO j = 0; j < blockSize; j++) {
825  // translateRowToCol will return the corresponding split column
826  LO localCol = this->translateRowToCol(blockRows[j]);
827  colToBlockOffset[localCol] = blockStart + j;
828  }
829  diagBlocks_[i]->resumeFill();
830  for (LO blockRow = 0; blockRow < blockSize; blockRow++) {
831  valuesToInsert.clear();
832  indicesToInsert.clear();
833  // get a view of the split row
834  LO inputSplitRow = this->blockRows_[blockStart + blockRow];
835  auto rowView = this->getInputRowView(inputSplitRow);
836  for (size_t k = 0; k < rowView.size(); k++) {
837  LO colOffset = colToBlockOffset[rowView.ind(k)];
838  if (blockStart <= colOffset && colOffset < blockEnd) {
839  LO blockCol = colOffset - blockStart;
840  indicesToInsert.push_back(blockCol);
841  valuesToInsert.push_back(rowView.val(k));
842  }
843  }
844  if (indicesToInsert.size())
845  diagBlocks_[i]->replaceGlobalValues(blockRow, indicesToInsert, valuesToInsert);
846  }
847  diagBlocks_[i]->fillComplete();
848  }
849  }
850 }
851 
852 template <typename MatrixType, typename InverseType>
854  typedef ILUT<Tpetra::RowMatrix<SC, LO, GO, NO>> ILUTInverse;
855 #ifdef HAVE_IFPACK2_AMESOS2
857  if (std::is_same<InverseType, ILUTInverse>::value) {
858  return "SparseILUT";
859  } else if (std::is_same<InverseType, AmesosInverse>::value) {
860  return "SparseAmesos";
861  } else {
862  throw std::logic_error("InverseType for SparseContainer must be Ifpack2::ILUT or Details::Amesos2Wrapper");
863  }
864 #else
865  // Macros can't have commas in their arguments, so we have to
866  // compute the bool first argument separately.
867  constexpr bool inverseTypeIsILUT = std::is_same<InverseType, ILUTInverse>::value;
868  TEUCHOS_TEST_FOR_EXCEPTION(!inverseTypeIsILUT, std::logic_error,
869  "InverseType for SparseContainer must be Ifpack2::ILUT<ROW>");
870  return "SparseILUT"; // the only supported sparse container specialization if no Amesos2
871 #endif
872 }
873 
874 } // namespace Ifpack2
875 
876 // For ETI
877 #include "Ifpack2_ILUT.hpp"
878 #ifdef HAVE_IFPACK2_AMESOS2
879 #include "Ifpack2_Details_Amesos2Wrapper.hpp"
880 #endif
881 
882 // There's no need to instantiate for CrsMatrix too. All Ifpack2
883 // preconditioners can and should do dynamic casts if they need a type
884 // more specific than RowMatrix.
885 
886 #ifdef HAVE_IFPACK2_AMESOS2
887 #define IFPACK2_SPARSECONTAINER_INSTANT(S, LO, GO, N) \
888  template class Ifpack2::SparseContainer<Tpetra::RowMatrix<S, LO, GO, N>, \
889  Ifpack2::ILUT<Tpetra::RowMatrix<S, LO, GO, N>>>; \
890  template class Ifpack2::SparseContainer<Tpetra::RowMatrix<S, LO, GO, N>, \
891  Ifpack2::Details::Amesos2Wrapper<Tpetra::RowMatrix<S, LO, GO, N>>>;
892 #else
893 #define IFPACK2_SPARSECONTAINER_INSTANT(S, LO, GO, N) \
894  template class Ifpack2::SparseContainer<Tpetra::RowMatrix<S, LO, GO, N>, \
895  Ifpack2::ILUT<Tpetra::RowMatrix<S, LO, GO, N>>>;
896 #endif
897 #endif // IFPACK2_SPARSECONTAINER_DEF_HPP
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_SparseContainer_def.hpp:853
Store and solve a local sparse linear problem.
Definition: Ifpack2_SparseContainer_decl.hpp:101
static RCP< Time > getNewCounter(const std::string &name)
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_SparseContainer_def.hpp:458
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS). This class allows a custom scalar type (LocalScalarType) to be used for storing blocks and solving the block systems. Hiding this template parameter from the Container interface simplifies the BlockRelaxation and ContainerFactory classes.
Definition: Ifpack2_Container_decl.hpp:307
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_SparseContainer_def.hpp:54
SparseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO >> &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_SparseContainer_def.hpp:27
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition: Ifpack2_ILUT_decl.hpp:60
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_SparseContainer_def.hpp:426
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_SparseContainer_def.hpp:435
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wrapper class for direct solvers in Amesos2.
Definition: Ifpack2_Details_Amesos2Wrapper_decl.hpp:68
typename mv_type::dual_view_type::t_host HostView
Definition: Ifpack2_Container_decl.hpp:105
virtual void apply(ConstHostView X, HostView Y, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * M^{-1} X + beta*Y.
Definition: Ifpack2_SparseContainer_def.hpp:145
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
virtual void weightedApply(ConstHostView X, HostView Y, ConstHostView W, int blockIndex, Teuchos::ETransp mode=Teuchos::NO_TRANS, SC alpha=Teuchos::ScalarTraits< SC >::one(), SC beta=Teuchos::ScalarTraits< SC >::zero()) const
Compute Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y.
Definition: Ifpack2_SparseContainer_def.hpp:271
void push_back(const value_type &x)
virtual ~SparseContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_SparseContainer_def.hpp:44
size_type size() const
TypeTo as(const TypeFrom &t)
virtual void setParameters(const Teuchos::ParameterList &List)
Set all necessary parameters.
Definition: Ifpack2_SparseContainer_def.hpp:48
virtual void compute()
Initialize and compute all blocks.
Definition: Ifpack2_SparseContainer_def.hpp:84
Ifpack2::SparseContainer class declaration.
void clearBlocks()
Definition: Ifpack2_SparseContainer_def.hpp:107
Implementation detail of Ifpack2::Container subclasses.
Definition: Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:52