10 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_HPP
11 #define TPETRA_CRSMATRIXMULTIPLYOP_HPP
19 #include "Tpetra_CrsMatrix.hpp"
23 #include "Tpetra_LocalCrsMatrixOperator.hpp"
63 template <
class Scalar,
77 using local_matrix_device_type =
103 Teuchos::ETransp mode = Teuchos::NO_TRANS,
104 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
105 Scalar beta = Teuchos::ScalarTraits<Scalar>::zero())
const override {
106 TEUCHOS_TEST_FOR_EXCEPTION(!
matrix_->isFillComplete(), std::runtime_error,
107 Teuchos::typeName(*
this) <<
"::apply(): underlying matrix is not fill-complete.");
109 Teuchos::typeName(*
this) <<
"::apply(X,Y): X and Y must have the same number of vectors.");
110 TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
111 Teuchos::typeName(*
this) <<
"::apply() does not currently support transposed multiplications for complex scalar types.");
112 if (mode == Teuchos::NO_TRANS) {
130 return matrix_->getDomainMap();
143 using local_matrix_op_t =
148 const Teuchos::RCP<const crs_matrix_type>
matrix_;
162 mutable Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
importMV_;
176 mutable Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
exportMV_;
183 Teuchos::ETransp mode,
191 using STS = Teuchos::ScalarTraits<Scalar>;
198 RCP<const import_type> importer =
matrix_->getGraph()->getImporter();
199 RCP<const export_type> exporter =
matrix_->getGraph()->getExporter();
203 const bool Y_is_overwritten = (beta == STS::zero());
204 const int myRank =
matrix_->getComm()->getRank();
205 if (Y_is_replicated && myRank != 0) {
214 X = Teuchos::rcp(
new MV(X_in, Teuchos::Copy));
217 X = Teuchos::rcpFromRef(X_in);
221 if (importer != null) {
229 if (exporter != null) {
239 if (exporter != null) {
244 auto X_lcl = X->getLocalViewDevice(Access::ReadOnly);
246 auto localMultiply =
local_matrix_op_t(std::make_shared<local_matrix_device_type>(
matrix_->getLocalMatrixDevice()));
250 if (importer != null) {
252 auto Y_lcl =
importMV_->getLocalViewDevice(Access::OverwriteAll);
254 localMultiply.apply(X_lcl, Y_lcl, mode, alpha, STS::zero());
255 if (Y_is_overwritten) {
267 MV Y(Y_in, Teuchos::Copy);
268 nonconst_view_type Y_lcl;
269 if (Y_is_overwritten)
274 localMultiply.apply(X_lcl, Y_lcl, mode, alpha, beta);
277 nonconst_view_type Y_lcl;
278 if (Y_is_overwritten)
283 localMultiply.apply(X_lcl, Y_lcl, mode, alpha, beta);
287 if (Y_is_replicated) {
298 using Teuchos::NO_TRANS;
301 using Teuchos::rcp_const_cast;
302 using Teuchos::rcpFromRef;
306 typedef Teuchos::ScalarTraits<Scalar> STS;
309 if (alpha == STS::zero()) {
310 if (beta == STS::zero()) {
312 }
else if (beta != STS::one()) {
327 RCP<const import_type> importer =
matrix_->getGraph()->getImporter();
328 RCP<const export_type> exporter =
matrix_->getGraph()->getExporter();
334 const bool Y_is_overwritten = (beta == STS::zero());
337 const bool Y_is_replicated =
347 if (Y_is_replicated &&
matrix_->getComm()->getRank() > 0) {
354 RCP<const MV> X_colMap;
355 if (importer.is_null()) {
363 RCP<MV> X_colMapNonConst = getColumnMapMultiVector(X_in,
true);
365 X_colMap = rcp_const_cast<
const MV>(X_colMapNonConst);
369 X_colMap = rcpFromRef(X_in);
372 ProfilingRegion regionImport(
"Tpetra::CrsMatrixMultiplyOp::apply: Import");
378 RCP<MV> X_colMapNonConst = getColumnMapMultiVector(X_in);
381 X_colMapNonConst->doImport(X_in, *importer,
INSERT);
382 X_colMap = rcp_const_cast<
const MV>(X_colMapNonConst);
389 RCP<MV> Y_rowMap = getRowMapMultiVector(Y_in);
391 auto X_lcl = X_colMap->getLocalViewDevice(Access::ReadOnly);
392 auto localMultiply =
local_matrix_op_t(std::make_shared<local_matrix_device_type>(
matrix_->getLocalMatrixDevice()));
399 if (!exporter.is_null()) {
400 auto Y_lcl = Y_rowMap->getLocalViewDevice(Access::OverwriteAll);
402 localMultiply.apply(X_lcl, Y_lcl, NO_TRANS,
405 ProfilingRegion regionExport(
"Tpetra::CrsMatrixMultiplyOp::apply: Export");
411 if (Y_is_overwritten) {
436 Y_rowMap = getRowMapMultiVector(Y_in,
true);
440 if (beta != STS::zero()) {
443 nonconst_view_type Y_lcl;
444 if (Y_is_overwritten)
445 Y_lcl = Y_rowMap->getLocalViewDevice(Access::OverwriteAll);
447 Y_lcl = Y_rowMap->getLocalViewDevice(Access::ReadWrite);
449 localMultiply.apply(X_lcl, Y_lcl, NO_TRANS, alpha, beta);
452 nonconst_view_type Y_lcl;
453 if (Y_is_overwritten)
458 localMultiply.apply(X_lcl, Y_lcl, NO_TRANS, alpha, beta);
466 if (Y_is_replicated) {
467 ProfilingRegion regionReduce(
"Tpetra::CrsMatrixMultiplyOp::apply: Reduce Y");
494 const bool force =
false)
const {
501 RCP<const import_type> importer =
matrix_->getGraph()->getImporter();
502 RCP<const map_type> colMap =
matrix_->getColMap();
515 if (!importer.is_null() || force) {
517 X_colMap = rcp(
new MV(colMap, numVecs));
555 getRowMapMultiVector(
const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Y_rangeMap,
556 const bool force =
false)
const {
560 typedef Export<LocalOrdinal, GlobalOrdinal, Node> export_type;
562 const size_t numVecs = Y_rangeMap.getNumVectors();
563 RCP<const export_type> exporter =
matrix_->getGraph()->getExporter();
564 RCP<const map_type> rowMap =
matrix_->getRowMap();
576 if (!exporter.is_null() || force) {
578 Y_rowMap = rcp(
new MV(rowMap, numVecs));
595 template <
class OpScalar,
601 CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node> >
607 return Teuchos::rcp(
new op_type(A));
612 #endif // TPETRA_CRSMATRIXMULTIPLYOP_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Abstract interface for local operators (e.g., matrices and preconditioners).
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this Operator.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMV_
Column Map MultiVector used in apply().
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this Operator.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute Y = beta*Y + alpha*Op(A)*X, where Op(A) is either A, , or .
bool hasTransposeApply() const override
Whether this Operator's apply() method can apply the transpose or conjugate transpose.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
~CrsMatrixMultiplyOp() override=default
Destructor (virtual for memory safety of derived classes).
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CrsMatrixMultiplyOp(const Teuchos::RCP< const crs_matrix_type > &A)
Constructor.
Insert new values that don't currently exist.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Abstract interface for operators (e.g., matrices and preconditioners).
const Teuchos::RCP< const crs_matrix_type > matrix_
The underlying CrsMatrix object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void applyTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Apply the transpose or conjugate transpose of the matrix to X_in, producing Y_in. ...
typename Node::device_type device_type
The Kokkos device type.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > exportMV_
Row Map MultiVector used in apply().
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Forward declaration of Tpetra::CrsMatrixMultiplyOp.
void applyNonTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Scalar alpha, Scalar beta) const
Apply the matrix (not its transpose) to X_in, producing Y_in.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
A class for wrapping a CrsMatrix multiply in a Operator.
Stand-alone utility functions and macros.
void reduce()
Sum values of a locally replicated multivector across all processes.
Teuchos::RCP< CrsMatrixMultiplyOp< OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node > > createCrsMatrixMultiplyOp(const Teuchos::RCP< const CrsMatrix< MatScalar, LocalOrdinal, GlobalOrdinal, Node > > &A)
Non-member function to create a CrsMatrixMultiplyOp.
Accumulate new values into existing values (may not be supported in all classes)
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.