Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Tacho_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef AMESOS2_TACHO_DEF_HPP
11 #define AMESOS2_TACHO_DEF_HPP
12 
13 #include <Teuchos_Tuple.hpp>
14 #include <Teuchos_ParameterList.hpp>
15 #include <Teuchos_StandardParameterEntryValidators.hpp>
16 
18 #include "Amesos2_Tacho_decl.hpp"
19 #include "Amesos2_Util.hpp"
20 
21 namespace Amesos2 {
22 
23 template <class Matrix, class Vector>
25  Teuchos::RCP<const Matrix> A,
26  Teuchos::RCP<Vector> X,
27  Teuchos::RCP<const Vector> B )
28  : SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
29 {
30  data_.method = 1; // Cholesky
31  data_.variant = 2; // solver variant
32  data_.streams = 1; // # of streams
33  data_.dofs_per_node = 1; // DoFs / node
34  data_.pivot_pert = false; // Diagonal pertubation
35  data_.verbose = false; // verbose
36 }
37 
38 
39 template <class Matrix, class Vector>
41 {
42  if ( this->root_ ) {
43  data_.solver.release();
44  }
45 }
46 
47 template <class Matrix, class Vector>
48 std::string
50 {
51  std::ostringstream oss;
52  oss << "Tacho solver interface";
53  return oss.str();
54 }
55 
56 template<class Matrix, class Vector>
57 int
59 {
60  return(0);
61 }
62 
63 template <class Matrix, class Vector>
64 int
66 {
67 #ifdef HAVE_AMESOS2_TIMERS
68  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
69 #endif
70 
71  int status = 0;
72  if ( this->root_ ) {
73  if(do_optimization()) {
74  this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view_);
75  this->matrixA_->returnColInd_kokkos_view(host_cols_view_);
76  }
77 
78  data_.solver.setSolutionMethod(data_.method);
79  data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
80  data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);
81  data_.solver.setVerbose(data_.verbose);
82  data_.solver.setLevelSetOptionNumStreams(data_.streams);
83  // TODO: Confirm param options
84  // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);
85 
86  // Symbolic factorization currently must be done on host
87  if (data_.dofs_per_node > 1) {
88  data_.solver.analyze(this->globalNumCols_, data_.dofs_per_node, host_row_ptr_view_, host_cols_view_);
89  } else {
90  data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_);
91  }
92  data_.solver.initialize();
93  }
94  return status;
95 }
96 
97 
98 template <class Matrix, class Vector>
99 int
101 {
102 #ifdef HAVE_AMESOS2_TIMERS
103  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
104 #endif
105 
106  int status = 0;
107  if ( this->root_ ) {
108  if(do_optimization()) {
109  // instead of holding onto the device poinster
110  // this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
111  // make an explicit copy
112  device_value_type_array device_nzvals_temp;
113  this->matrixA_->returnValues_kokkos_view(device_nzvals_temp);
114  Kokkos::deep_copy(device_nzvals_view_, device_nzvals_temp);
115  }
116  if (data_.pivot_pert) {
117  data_.solver.useDefaultPivotTolerance();
118  } else {
119  data_.solver.useNoPivotTolerance();
120  }
121  data_.solver.factorize(device_nzvals_view_);
122  }
123  return status;
124 }
125 
126 template <class Matrix, class Vector>
127 int
129  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
130 {
131  using Teuchos::as;
132 
133  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
134  const size_t nrhs = X->getGlobalNumVectors();
135 
136  // don't allocate b since it's handled by the copy manager and might just be
137  // be assigned, not copied anyways.
138  // also don't allocate x since we will also use do_get to allocate this if
139  // necessary. When a copy is not necessary we'll solve directly to the x
140  // values in the MV.
141  bool bDidAssignX;
142  { // Get values from RHS B
143 #ifdef HAVE_AMESOS2_TIMERS
144  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
145 #endif
146  const bool initialize_data = true;
147  const bool do_not_initialize_data = false;
148  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
149  device_solve_array_t>::do_get(initialize_data, B, this->bValues_,
150  as<size_t>(ld_rhs),
151  ROOTED, this->rowIndexBase_);
152  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
153  device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_,
154  as<size_t>(ld_rhs),
155  ROOTED, this->rowIndexBase_);
156  }
157 
158  int ierr = 0; // returned error code
159 
160  if ( this->root_ ) { // Do solve!
161  // Bump up the workspace size if needed
162 #ifdef HAVE_AMESOS2_TIMERS
163  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
164 #endif
165  if (workspace_.extent(0) < this->globalNumRows_ || workspace_.extent(1) < nrhs) {
166  workspace_ = device_solve_array_t(
167  Kokkos::ViewAllocateWithoutInitializing("t"), this->globalNumRows_, nrhs);
168  }
169 
170  data_.solver.solve(xValues_, bValues_, workspace_);
171 
172  int status = 0; // TODO: determine what error handling will be
173  if(status != 0) {
174  ierr = status;
175  }
176  }
177 
178  /* All processes should have the same error code */
179  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
180 
181  TEUCHOS_TEST_FOR_EXCEPTION( ierr != 0, std::runtime_error,
182  "tacho_solve has error code: " << ierr );
183 
184  /* Update X's global values */
185 
186  // if bDidAssignX, then we solved straight to the adapter's X memory space without
187  // requiring additional memory allocation, so the x data is already in place.
188  if(!bDidAssignX) {
189 #ifdef HAVE_AMESOS2_TIMERS
190  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
191 #endif
192 
193  // This will do nothing is if the target view matches the src view, which
194  // can be the case if the memory spaces match. See comments above for do_get.
195  Util::template put_1d_data_helper_kokkos_view<
196  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
197  as<size_t>(ld_rhs),
198  ROOTED, this->rowIndexBase_);
199  }
200 
201  return(ierr);
202 }
203 
204 
205 template <class Matrix, class Vector>
206 bool
208 {
209  // Tacho can only apply the solve routines to square matrices
210  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
211 }
212 
213 
214 template <class Matrix, class Vector>
215 void
216 TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
217 {
218  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
219 
220  // TODO: Confirm param options
221 
222  // factorization type
223  auto method_name = parameterList->get<std::string> ("method", "chol");
224  if (method_name == "chol")
225  data_.method = 1;
226  else if (method_name == "ldl")
227  data_.method = 2;
228  else if (method_name == "lu")
229  data_.method = 3;
230  else {
231  std::cout << "Error: not supported solution method\n";
232  }
233  // solver type
234  data_.variant = parameterList->get<int> ("variant", 2);
235  // small problem threshold
236  data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
237  // verbosity
238  data_.verbose = parameterList->get<bool> ("verbose", false);
239  // # of streams
240  data_.streams = parameterList->get<int> ("num-streams", 1);
241  // DoFs / node
242  data_.dofs_per_node = parameterList->get<int> ("dofs-per-node", 1);
243  // Perturb tiny pivots
244  data_.pivot_pert = parameterList->get<bool> ("perturb-pivot", false);
245  // TODO: Confirm param options
246  // data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
247  // data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
248 }
249 
250 
251 template <class Matrix, class Vector>
252 Teuchos::RCP<const Teuchos::ParameterList>
254 {
255  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
256 
257  if( is_null(valid_params) ){
258  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
259 
260  pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
261  pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
262  pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
263  pl->set("verbose", false, "Verbosity");
264  pl->set("num-streams", 1, "Number of GPU streams");
265  pl->set("dofs-per-node", 1, "DoFs per node");
266  pl->set("perturb-pivot", false, "Perturb tiny pivots");
267 
268  // TODO: Confirm param options
269  // pl->set("kokkos-threads", 1, "Number of threads");
270  // pl->set("max-num-superblocks", 4, "Max number of superblocks");
271 
272  valid_params = pl;
273  }
274 
275  return valid_params;
276 }
277 
278 template <class Matrix, class Vector>
279 bool
281  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
282 }
283 
284 template <class Matrix, class Vector>
285 bool
287 {
288 
289  if(current_phase == SOLVE) {
290  return(false);
291  }
292 
293  if(!do_optimization()) {
294 #ifdef HAVE_AMESOS2_TIMERS
295  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
296 #endif
297 
298  // Note views are allocated but eventually we should remove this.
299  // The internal copy manager will decide if we can assign or deep_copy
300  // and then allocate if necessary. However the GPU solvers are serial right
301  // now so I didn't complete refactoring the matrix code for the parallel
302  // case. If we added that later, we should have it hooked up to the copy
303  // manager and then these allocations can go away.
304  if( this->root_ ) {
305  device_nzvals_view_ = device_value_type_array(
306  Kokkos::ViewAllocateWithoutInitializing("nzvals"), this->globalNumNonZeros_);
307  host_cols_view_ = host_ordinal_type_array(
308  Kokkos::ViewAllocateWithoutInitializing("colind"), this->globalNumNonZeros_);
309  host_row_ptr_view_ = host_size_type_array(
310  Kokkos::ViewAllocateWithoutInitializing("rowptr"), this->globalNumRows_ + 1);
311  }
312 
313  typename host_size_type_array::value_type nnz_ret = 0;
314  {
315  #ifdef HAVE_AMESOS2_TIMERS
316  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
317  #endif
318 
319  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
320  std::runtime_error,
321  "Row and column maps have different indexbase ");
322 
324  device_value_type_array, host_ordinal_type_array, host_size_type_array>::do_get(
325  this->matrixA_.ptr(),
326  device_nzvals_view_,
327  host_cols_view_,
328  host_row_ptr_view_,
329  nnz_ret,
330  ROOTED, ARBITRARY,
331  this->columnIndexBase_);
332  }
333  }
334  else {
335  if( this->root_ ) {
336  // instead of holding onto the device poinster (which could cause issue)
337  // make an explicit copy
338  device_nzvals_view_ = device_value_type_array(
339  Kokkos::ViewAllocateWithoutInitializing("nzvals"), this->globalNumNonZeros_);
340  }
341  }
342 
343  return true;
344 }
345 
346 
347 template<class Matrix, class Vector>
348 const char* TachoSolver<Matrix,Vector>::name = "Tacho";
349 
350 
351 } // end namespace Amesos2
352 
353 #endif // AMESOS2_TACHO_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 interface to the Tacho package.
Definition: Amesos2_Tacho_decl.hpp:33
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Tacho.
Definition: Amesos2_Tacho_def.hpp:65
Utility functions for Amesos2.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Tacho_def.hpp:207
int numericFactorization_impl()
Tacho specific numeric factorization.
Definition: Amesos2_Tacho_def.hpp:100
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Tacho_def.hpp:58
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Tacho_def.hpp:253
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:629
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Tacho_def.hpp:49
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Tacho specific solve.
Definition: Amesos2_Tacho_def.hpp:128
TachoSolver(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Tacho_def.hpp:24
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Tacho_def.hpp:286
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition: Amesos2_Tacho_def.hpp:280
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
~TachoSolver()
Destructor.
Definition: Amesos2_Tacho_def.hpp:40