10 #ifndef IFPACK2_GAUSS_SEIDEL_HPP
11 #define IFPACK2_GAUSS_SEIDEL_HPP
15 template <
typename Scalar,
typename LO,
typename GO,
typename NT>
17 using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, NT>;
18 using bcrs_matrix_type = Tpetra::BlockCrsMatrix<Scalar, LO, GO, NT>;
19 using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, NT>;
20 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
21 using vector_type = Tpetra::Vector<Scalar, LO, GO, NT>;
22 using multivector_type = Tpetra::MultiVector<Scalar, LO, GO, NT>;
23 using block_multivector_type = Tpetra::BlockMultiVector<Scalar, LO, GO, NT>;
24 using mem_space_t =
typename local_matrix_device_type::memory_space;
25 using rowmap_t =
typename local_matrix_device_type::row_map_type::HostMirror;
26 using entries_t =
typename local_matrix_device_type::index_type::HostMirror;
27 using values_t =
typename local_matrix_device_type::values_type::HostMirror;
28 using const_rowmap_t =
typename rowmap_t::const_type;
29 using const_entries_t =
typename entries_t::const_type;
30 using const_values_t =
typename values_t::const_type;
31 using Offset =
typename rowmap_t::non_const_value_type;
32 using IST =
typename crs_matrix_type::impl_scalar_type;
33 using KAT = Kokkos::ArithTraits<IST>;
35 using InverseBlocks = Kokkos::View<IST***, typename bcrs_matrix_type::device_type>;
36 using InverseBlocksHost =
typename InverseBlocks::HostMirror;
38 typedef typename crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
39 typedef typename crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
40 typedef typename crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;
45 numRows = A_->getLocalNumRows();
46 haveRowMatrix =
false;
47 inverseDiagVec = inverseDiagVec_;
48 applyRows = applyRows_;
55 numRows = A_->getLocalNumRows();
57 inverseDiagVec = inverseDiagVec_;
58 applyRows = applyRows_;
62 rowMatrixRowmap = rowmap_t(Kokkos::ViewAllocateWithoutInitializing(
"Arowmap"), numRows + 1);
63 rowMatrixEntries = entries_t(Kokkos::ViewAllocateWithoutInitializing(
"Aentries"), A_->getLocalNumEntries());
64 rowMatrixValues = values_t(Kokkos::ViewAllocateWithoutInitializing(
"Avalues"), A_->getLocalNumEntries());
65 size_t maxDegree = A_->getLocalMaxNumRowEntries();
66 nonconst_values_host_view_type rowValues(
"rowValues", maxDegree);
67 nonconst_local_inds_host_view_type rowEntries(
"rowEntries", maxDegree);
69 for (LO i = 0; i <= numRows; i++) {
70 rowMatrixRowmap(i) = accum;
74 A_->getLocalRowCopy(i, rowEntries, rowValues, degree);
76 size_t rowBegin = rowMatrixRowmap(i);
77 for (
size_t j = 0; j < degree; j++) {
78 rowMatrixEntries(rowBegin + j) = rowEntries[j];
79 rowMatrixValues(rowBegin + j) = rowValues[j];
86 numRows = A_->getLocalNumRows();
87 haveRowMatrix =
false;
89 inverseBlockDiag = Kokkos::create_mirror_view(inverseBlockDiag_);
90 Kokkos::deep_copy(inverseBlockDiag, inverseBlockDiag_);
91 applyRows = applyRows_;
93 blockSize = A_->getBlockSize();
96 template <
bool useApplyRows,
bool multipleRHS,
bool omegaNotOne>
97 void applyImpl(
const const_values_t& Avalues,
const const_rowmap_t& Arowmap,
const const_entries_t& Aentries, multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction) {
99 LO numApplyRows = useApplyRows ? (LO)applyRows.size() : numRows;
101 auto inverseDiag = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
102 bool forward = direction == Tpetra::Forward;
104 LO numVecs = x.getNumVectors();
106 auto xlcl = x.get2dViewNonConst();
107 auto blcl = b.get2dView();
108 for (LO i = 0; i < numApplyRows; i++) {
111 row = useApplyRows ? applyRows[i] : i;
113 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
114 for (LO k = 0; k < numVecs; k++) {
115 accum[k] = KAT::zero();
117 Offset rowBegin = Arowmap(row);
118 Offset rowEnd = Arowmap(row + 1);
119 for (Offset j = rowBegin; j < rowEnd; j++) {
120 LO col = Aentries(j);
121 IST val = Avalues(j);
122 for (LO k = 0; k < numVecs; k++) {
123 accum[k] += val * IST(xlcl[k][col]);
127 IST dinv = inverseDiag(row);
128 for (LO k = 0; k < numVecs; k++) {
130 xlcl[k][row] += Scalar(omega * dinv * (IST(blcl[k][row]) - accum[k]));
132 xlcl[k][row] += Scalar(dinv * (IST(blcl[k][row]) - accum[k]));
136 auto xlcl = Kokkos::subview(x.getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
137 auto blcl = Kokkos::subview(b.getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
138 auto dlcl = Kokkos::subview(inverseDiagVec->getLocalViewHost(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
139 for (LO i = 0; i < numApplyRows; i++) {
142 row = useApplyRows ? applyRows[i] : i;
144 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
145 IST accum = KAT::zero();
146 Offset rowBegin = Arowmap(row);
147 Offset rowEnd = Arowmap(row + 1);
148 for (Offset j = rowBegin; j < rowEnd; j++) {
149 accum += Avalues(j) * xlcl(Aentries(j));
152 IST dinv = dlcl(row);
154 xlcl(row) += omega * dinv * (blcl(row) - accum);
156 xlcl(row) += dinv * (blcl(row) - accum);
161 void applyBlock(block_multivector_type& x,
const block_multivector_type& b, Tpetra::ESweepDirection direction) {
162 if (direction == Tpetra::Symmetric) {
163 applyBlock(x, b, Tpetra::Forward);
164 applyBlock(x, b, Tpetra::Backward);
167 auto Abcrs = Teuchos::rcp_dynamic_cast<
const bcrs_matrix_type>(A);
169 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::applyBlock: A must be a BlockCrsMatrix");
170 auto Avalues = Abcrs->getValuesHost();
171 auto AlclGraph = Abcrs->getCrsGraph().getLocalGraphHost();
172 auto Arowmap = AlclGraph.row_map;
173 auto Aentries = AlclGraph.entries;
175 Offset bs2 = blockSize * blockSize;
176 LO numVecs = x.getNumVectors();
177 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> accum(
178 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel Accumulator"), blockSize, numVecs);
179 Kokkos::View<IST**, Kokkos::LayoutLeft, Kokkos::HostSpace> dinv_accum(
180 Kokkos::ViewAllocateWithoutInitializing(
"BlockGaussSeidel A_ii^-1*Accumulator"), blockSize, numVecs);
181 bool useApplyRows = !applyRows.is_null();
182 bool forward = direction == Tpetra::Forward;
183 LO numApplyRows = useApplyRows ? applyRows.size() : numRows;
184 for (LO i = 0; i < numApplyRows; i++) {
187 row = useApplyRows ? applyRows[i] : i;
189 row = useApplyRows ? applyRows[numApplyRows - 1 - i] : numApplyRows - 1 - i;
190 for (LO v = 0; v < numVecs; v++) {
191 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
192 for (LO k = 0; k < blockSize; k++) {
193 accum(k, v) = KAT::zero();
196 Offset rowBegin = Arowmap(row);
197 Offset rowEnd = Arowmap(row + 1);
198 for (Offset j = rowBegin; j < rowEnd; j++) {
199 LO col = Aentries(j);
200 const IST* blk = &Avalues(j * bs2);
201 for (LO v = 0; v < numVecs; v++) {
202 auto xCol = x.getLocalBlockHost(col, v, Tpetra::Access::ReadOnly);
203 for (LO br = 0; br < blockSize; br++) {
204 for (LO bc = 0; bc < blockSize; bc++) {
205 IST Aval = blk[br * blockSize + bc];
206 accum(br, v) += Aval * xCol(bc);
212 auto invBlock = Kokkos::subview(inverseBlockDiag, row, Kokkos::ALL(), Kokkos::ALL());
213 Kokkos::deep_copy(dinv_accum, KAT::zero());
214 for (LO v = 0; v < numVecs; v++) {
215 auto bRow = b.getLocalBlockHost(row, v, Tpetra::Access::ReadOnly);
216 for (LO br = 0; br < blockSize; br++) {
217 accum(br, v) = bRow(br) - accum(br, v);
220 for (LO v = 0; v < numVecs; v++) {
221 for (LO br = 0; br < blockSize; br++) {
222 for (LO bc = 0; bc < blockSize; bc++) {
223 dinv_accum(br, v) += invBlock(br, bc) * accum(bc, v);
228 for (LO v = 0; v < numVecs; v++) {
229 auto xRow = x.getLocalBlockHost(row, v, Tpetra::Access::ReadWrite);
230 for (LO k = 0; k < blockSize; k++) {
231 xRow(k) += omega * dinv_accum(k, v);
238 void apply(multivector_type& x,
const multivector_type& b, Tpetra::ESweepDirection direction) {
239 if (direction == Tpetra::Symmetric) {
240 apply(x, b, Tpetra::Forward);
241 apply(x, b, Tpetra::Backward);
244 const_values_t Avalues;
245 const_rowmap_t Arowmap;
246 const_entries_t Aentries;
248 Avalues = rowMatrixValues;
249 Arowmap = rowMatrixRowmap;
250 Aentries = rowMatrixEntries;
252 auto Acrs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A);
254 throw std::runtime_error(
"Ifpack2::Details::GaussSeidel::apply: either haveRowMatrix, or A is CrsMatrix");
255 auto Alcl = Acrs->getLocalMatrixHost();
256 Avalues = Alcl.values;
257 Arowmap = Alcl.graph.row_map;
258 Aentries = Alcl.graph.entries;
260 if (applyRows.is_null()) {
261 if (x.getNumVectors() > 1)
262 this->
template applyImpl<false, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
265 if (omega == KAT::one())
266 this->
template applyImpl<false, false, false>(Avalues, Arowmap, Aentries, x, b, direction);
268 this->
template applyImpl<false, false, true>(Avalues, Arowmap, Aentries, x, b, direction);
271 this->
template applyImpl<true, true, true>(Avalues, Arowmap, Aentries, x, b, direction);
278 values_t rowMatrixValues;
279 rowmap_t rowMatrixRowmap;
280 entries_t rowMatrixEntries;
288 InverseBlocksHost inverseBlockDiag;