42 #ifndef STOKHOS_TPETRA_UTILITIES_HPP 
   43 #define STOKHOS_TPETRA_UTILITIES_HPP 
   47 #include "Tpetra_CrsMatrix.hpp" 
   55   template <
class ViewType>
 
   63       mean_vals = ViewType(
"mean-values", vals.extent(0));
 
   76   template <
class Storage, 
class ... P>
 
   88       typename Scalar::cijk_type mean_cijk =
 
   89         Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
 
   90       mean_vals = Kokkos::make_view<ViewType>(
"mean-values", mean_cijk, nnz, 1);
 
   91       Kokkos::parallel_for( nnz, *
this );
 
   94     KOKKOS_INLINE_FUNCTION
 
   96       mean_vals(i) = vals(i).fastAccessCoeff(0);
 
  109   template <
class Storage, 
class ... P>
 
  124       Kokkos::parallel_for( nnz, *
this );
 
  127     KOKKOS_INLINE_FUNCTION
 
  132         s += vals(i).fastAccessCoeff(
j);
 
  148   template <
class ViewType>
 
  156       mean_vals = ViewType(
"mean-values", vals.extent(0));
 
  169   template <
class Storage, 
class ... P>
 
  183       Kokkos::parallel_for( nnz, *
this );
 
  186     KOKKOS_INLINE_FUNCTION
 
  188       mean_vals(i) = vals(i).fastAccessCoeff(0);
 
  201   template <
class Storage, 
class ... P>
 
  217       Kokkos::parallel_for( nnz, *
this );
 
  220     KOKKOS_INLINE_FUNCTION
 
  225         s += vals(i).fastAccessCoeff(
j);
 
  237   template <
typename Scalar, 
typename LO, 
typename GO, 
typename N>
 
  243     typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
 
  244     typedef Tpetra::Map<LO,GO,N> Map;
 
  246     typedef typename MatrixType::local_matrix_type KokkosMatrixType;
 
  248     typedef typename KokkosMatrixType::StaticCrsGraphType KokkosGraphType;
 
  249     typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
 
  254     KokkosMatrixType kokkos_matrix = A.getLocalMatrix();
 
  255     KokkosGraphType kokkos_graph = kokkos_matrix.graph;
 
  256     KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
 
  257     const size_t ncols = kokkos_matrix.numCols();
 
  259     typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
 
  260     MeanFunc meanfunc(matrix_values);
 
  261     KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
 
  266     KokkosMatrixType mean_kokkos_matrix(
 
  267       "mean-matrix", ncols, mean_matrix_values, kokkos_graph);
 
  269       rcp( 
new MatrixType(rmap, cmap, mean_kokkos_matrix) );
 
  273   template <
typename Scalar, 
typename LO, 
typename GO, 
typename N>
 
  280     typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
 
  281     typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
 
  282     typedef Tpetra::Map<LO,GO,N> Map;
 
  284     typedef typename MatrixType::local_matrix_type KokkosMatrixType;
 
  285     typedef typename ScalarMatrixType::local_matrix_type ScalarKokkosMatrixType;
 
  287     typedef typename KokkosMatrixType::StaticCrsGraphType KokkosGraphType;
 
  288     typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
 
  293     KokkosMatrixType kokkos_matrix = A.getLocalMatrix();
 
  294     KokkosGraphType kokkos_graph = kokkos_matrix.graph;
 
  295     KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
 
  296     const size_t ncols = kokkos_matrix.numCols();
 
  298     typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
 
  299     MeanFunc meanfunc(matrix_values);
 
  300     KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
 
  305     ScalarKokkosMatrixType mean_kokkos_matrix(
 
  306       "mean-matrix", ncols, mean_matrix_values, kokkos_graph);
 
  308       rcp( 
new ScalarMatrixType(rmap, cmap, mean_kokkos_matrix) );
 
  316   template <
typename ExecSpace>
 
  319     template <
typename DstView, 
typename SrcView>
 
  324     template <
typename DstView, 
typename SrcView>
 
  325     void impl(
const DstView& dst, 
const SrcView& src) {
 
  326       typedef typename SrcView::non_const_value_type 
Scalar;
 
  327       const size_t m = src.extent(0);
 
  328       const size_t n = src.extent(1);
 
  330       Kokkos::RangePolicy<exec_space> policy(0,m);
 
  331       Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
 
  333         for (
size_t j=0; 
j<n; ++
j) {
 
  334           Scalar& s = src(i,
j);
 
  335           for (
size_t k=0; k<p; ++k)
 
  336             dst(i,
j*p+k) = s.fastAccessCoeff(k);
 
  344   template <
typename ExecSpace>
 
  348     template <
typename DstView, 
typename SrcView>
 
  353     template <
typename DstView, 
typename SrcView>
 
  354     void impl(
const DstView& dst, 
const SrcView& src) {
 
  355       typedef typename DstView::non_const_value_type 
Scalar;
 
  356       const size_t m = dst.extent(0);
 
  357       const size_t n = dst.extent(1);
 
  360       Kokkos::RangePolicy<exec_space> policy(0,m);
 
  361       Kokkos::parallel_for( policy, KOKKOS_LAMBDA(
const size_t i)
 
  363         for (
size_t j=0; 
j<n; ++
j) {
 
  364           Scalar& d = dst(i,
j);
 
  365           for (
size_t k=0; k<p; ++k)
 
  366             d.fastAccessCoeff(k) = src(i,
j*p+k);
 
  372 #ifdef KOKKOS_ENABLE_CUDA 
  376   struct CopyPCE2Scalar<Kokkos::Cuda> {
 
  379     template <
typename DstView, 
typename SrcView>
 
  384     template <
typename DstView, 
typename SrcView>
 
  385     void impl(
const DstView& dst, 
const SrcView& src) {
 
  386       typedef typename DstView::non_const_value_type 
Scalar;
 
  387       typedef Kokkos::TeamPolicy<exec_space> Policy;
 
  388       typedef typename Policy::member_type Member;
 
  390       const size_t m = src.extent(0);
 
  391       const size_t n = src.extent(1);
 
  394       const size_t ChunkSize = 16;
 
  395       const size_t M = (m+ChunkSize-1)/ChunkSize;
 
  397       Policy policy(M,ChunkSize,ChunkSize);
 
  398       Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
 
  400         __shared__ Scalar tmp[ChunkSize][ChunkSize];
 
  401         const size_t i_block = blockIdx.x*ChunkSize;
 
  403         for (
size_t j=0; 
j<n; ++
j) {
 
  404           for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
 
  410             size_t i = i_block + threadIdx.y;
 
  411             size_t k = k_block + threadIdx.x;
 
  413               tmp[threadIdx.y][threadIdx.x] = src(i,
j).fastAccessCoeff(k);
 
  419             i = i_block + threadIdx.x;
 
  420             k = k_block + threadIdx.y;
 
  422                 dst(i,
j*p+k) = tmp[threadIdx.x][threadIdx.y];
 
  433   struct CopyScalar2PCE<Kokkos::Cuda> {
 
  436     template <
typename DstView, 
typename SrcView>
 
  441     template <
typename DstView, 
typename SrcView>
 
  442     void impl(
const DstView& dst, 
const SrcView& src) {
 
  443       typedef typename SrcView::non_const_value_type 
Scalar;
 
  444       typedef Kokkos::TeamPolicy<exec_space> Policy;
 
  445       typedef typename Policy::member_type Member;
 
  447       const size_t m = dst.extent(0);
 
  448       const size_t n = dst.extent(1);
 
  451       const size_t ChunkSize = 16;
 
  452       const size_t M = (m+ChunkSize-1)/ChunkSize;
 
  454       Policy policy(M,ChunkSize,ChunkSize);
 
  455       Kokkos::parallel_for( policy, [=] __device__(
const Member& team)
 
  457         __shared__ Scalar tmp[ChunkSize][ChunkSize];
 
  458         const size_t i_block = blockIdx.x*ChunkSize;
 
  460         for (
size_t j=0; 
j<n; ++
j) {
 
  461           for (
size_t k_block=0; k_block<p; k_block+=ChunkSize) {
 
  467             size_t i = i_block + threadIdx.x;
 
  468             size_t k = k_block + threadIdx.y;
 
  470               tmp[threadIdx.x][threadIdx.y] = src(i,
j*p+k);
 
  476             i = i_block + threadIdx.y;
 
  477             k = k_block + threadIdx.x;
 
  479                 dst(i,
j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
 
  490   template <
typename DstView, 
typename SrcView>
 
  496   template <
typename DstView, 
typename SrcView>
 
  504   template <
typename Scalar,
 
  509     virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
 
  516     typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> 
scalar_op_type;
 
  525       return mb_op->getDomainMap();
 
  530       return mb_op->getRangeMap();
 
  534     apply (
const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
 
  535            Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
 
  540       typedef typename scalar_mv_type::device_type device_type;
 
  542       auto xv = X.template getLocalView<device_type>();
 
  543       auto yv = Y.template getLocalView<device_type>();
 
  546           X_s->getNumVectors() != X.getNumVectors()*pce_size)
 
  548                                               X.getNumVectors()*pce_size));
 
  550           Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
 
  552                                               Y.getNumVectors()*pce_size));
 
  553       auto xv_s = 
X_s->template getLocalView<device_type>();
 
  554       auto yv_s = 
Y_s->template getLocalView<device_type>();
 
  568       return mb_op->hasTransposeApply();
 
  573     typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> 
scalar_mv_type;
 
  581 #endif // STOKHOS_TPETRA_UTILITIES_HPP 
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
MeanViewType getMeanValues() const 
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
ViewType::execution_space execution_space
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Sacado::MP::Vector< Storage > Scalar
ViewType::size_type size_type
MeanViewType getMeanValues() const 
GetMeanValsFunc(const ViewType &vals_)
Scalar::value_type MeanScalar
scalar_type::value_type base_scalar_type
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const 
ViewType::execution_space execution_space
ViewType::execution_space execution_space
ViewType::execution_space execution_space
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
Sacado::UQ::PCE< Storage > Scalar
Kokkos::View< Scalar *, P... > ViewType
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P...> >::value, unsigned >::type dimension_scalar(const View< T, P...> &view)
Teuchos::RCP< scalar_mv_type > X_s
MeanViewType getMeanValues() const 
ViewType::size_type size_type
virtual bool hasTransposeApply() const 
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
virtual ~MeanBasedTpetraOperator()
Sacado::MP::Vector< Storage > Scalar
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
ViewType::size_type size_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Kokkos::View< MeanScalar *, P... > MeanViewType
MeanViewType getMeanValues() const 
Kokkos::View< Scalar *, P... > ViewType
GetMeanValsFunc(const ViewType &vals)
KokkosClassic::DefaultNode::DefaultNodeType Node
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const 
ViewType::execution_space execution_space
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
MeanViewType getMeanValues() const 
ViewType::size_type size_type
MeanViewType getMeanValues() const 
Get mean values matrix for mean-based preconditioning. 
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
ViewType::size_type size_type
GetScalarMeanValsFunc(const ViewType &vals_)
Kokkos::View< Scalar *, P... > ViewType
GetMeanValsFunc(const ViewType &vals_)
Get mean values matrix for mean-based preconditioning. 
GlobalOrdinal global_ordinal_type
GetScalarMeanValsFunc(const ViewType &vals)
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
Kokkos::View< MeanScalar *, P... > MeanViewType
Sacado::UQ::PCE< Storage > Scalar
CopyScalar2PCE(const DstView &dst, const SrcView &src)
LocalOrdinal local_ordinal_type
GetScalarMeanValsFunc(const ViewType &vals_)
ViewType::execution_space execution_space
Scalar::value_type MeanScalar
ViewType::size_type size_type
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const 
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Kokkos::View< Scalar *, P... > ViewType
Teuchos::RCP< scalar_mv_type > Y_s