#include <Tpetra_Core.hpp>
#include <Tpetra_CrsMatrix.hpp>
#include <Tpetra_MultiVector.hpp>
#include <Galeri_XpetraParameters.hpp>
#include <Galeri_XpetraMaps.hpp>
#include <Galeri_XpetraMatrixTypes.hpp>
#include <Galeri_XpetraProblemFactory.hpp>
#include <Teuchos_Comm.hpp>
#include <Teuchos_CommHelpers.hpp>
#include <Teuchos_DefaultComm.hpp>
#include "Teuchos_FancyOStream.hpp"
#include <Teuchos_oblackholestream.hpp>
#include "Teuchos_StandardCatchMacros.hpp"
#include "Teuchos_StackedTimer.hpp"
#include "BelosTpetraAdapter.hpp"
template <typename ScalarType>
int run(int argc, char *argv[]) {
  
  
  
  
  
  
  
  
  
  
  using ST = typename Tpetra::MultiVector<ScalarType>::scalar_type;
  using LO = typename Tpetra::MultiVector<>::local_ordinal_type;
  using GO = typename Tpetra::MultiVector<>::global_ordinal_type;
  using NT = typename Tpetra::MultiVector<>::node_type;
  using OP  = typename Tpetra::Operator<ST,LO,GO,NT>;
  using MV  = typename Tpetra::MultiVector<ST,LO,GO,NT>;
  using tmap_t       = Tpetra::Map<LO,GO,NT>;
  using tcrsmatrix_t = Tpetra::CrsMatrix<ST,LO,GO,NT>;
  const auto comm = Tpetra::getDefaultComm();
  const int myPID = comm->getRank();
  bool verbose = false;
  bool success = true;
  auto out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  try {
    bool procVerbose = false;
    bool debug = false;
    int frequency = -1;        
    int blockSize = 1;         
    int numrhs = 1;            
    int maxIters = -1;         
    int maxSubspace = 50;      
    int maxRestarts = 15;      
    int nx = 10;               
    int ny = nx;
    int nz = nx;
    MT tol = 1.0e-5;           
    std::string solverName = "Block GMRES"; 
    Galeri::Xpetra::Parameters<GO> galeriParameters(cmdp, nx, ny, nz, "Laplace2D");
    cmdp.
setOption(
"verbose",
"quiet",&verbose,
"Print messages and results.");
 
    cmdp.
setOption(
"debug",
"nondebug",&debug,
"Print debugging information from solver.");
 
    cmdp.
setOption(
"frequency",&frequency,
"Solvers frequency for printing residuals (#iters).");
 
    cmdp.
setOption(
"tol",&tol,
"Relative residual tolerance used by GMRES solver.");
 
    cmdp.
setOption(
"num-rhs",&numrhs,
"Number of right-hand sides to be solved for.");
 
    cmdp.
setOption(
"block-size",&blockSize,
"Block size used by GMRES.");
 
    cmdp.
setOption(
"max-iters",&maxIters,
"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");
 
    cmdp.
setOption(
"max-subspace",&maxSubspace,
"Maximum number of blocks the solver can use for the subspace.");
 
    cmdp.
setOption(
"max-restarts",&maxRestarts,
"Maximum number of restarts allowed for GMRES solver.");
 
    cmdp.
setOption(
"solverName", &solverName, 
"The type of solver to use.");
 
      return -1;
    }
    if (!verbose)
      frequency = -1;  
    std::string watchrProblemName = std::string("Belos ") + solverName + " " + std::to_string(comm->getSize()) + " ranks";
    procVerbose = ( verbose && (myPID==0) ); 
    if (procVerbose) {
    }
    
    
    
    
    
    std::string matrixType = galeriParameters.GetMatrixType();
    RCP<tmap_t> Map;
    if (matrixType == "Laplace1D" || matrixType == "Identity") {
      Map = RCP{Galeri::Xpetra::CreateMap<LO,GO,tmap_t>("Cartesian1D", comm, GaleriList)};
    } else if (matrixType == "Laplace2D" || matrixType == "Star2D" ||
               matrixType == "BigStar2D" || matrixType == "AnisotropicDiffusion" || matrixType == "Elasticity2D" || matrixType == "Recirc2D") {
      Map = RCP{Galeri::Xpetra::CreateMap<LO,GO,tmap_t>("Cartesian2D", comm, GaleriList)};
    } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
      Map = RCP{Galeri::Xpetra::CreateMap<LO,GO,tmap_t>("Cartesian3D", comm, GaleriList)};
    }
    auto GaleriProblem = Galeri::Xpetra::BuildProblem<ST,LO,GO,tmap_t,tcrsmatrix_t,MV>(matrixType, Map, GaleriList);
    
    auto A = GaleriProblem->BuildMatrix();
    
    RCP<MV> B = 
rcp (
new MV (Map, numrhs));
 
    RCP<MV> X = 
rcp (
new MV (Map, numrhs));
 
    RCP<MV> Xexact = 
rcp (
new MV (Map, numrhs));
 
    MVT::MvRandom(*Xexact);
    OPT::Apply(*A, *Xexact, *B );
    
    
    const int numGlobalElements = B->getGlobalLength();
    if (maxIters == -1)
      maxIters = numGlobalElements/blockSize - 1; 
    ParameterList belosList;
    belosList.set( "Num Blocks", maxSubspace);             
    belosList.set( "Block Size", blockSize );              
    belosList.set( "Maximum Iterations", maxIters );       
    belosList.set( "Maximum Restarts", maxRestarts );      
    belosList.set( "Convergence Tolerance", tol );         
    if (verbose) {
      if (frequency > 0)
        belosList.set( "Output Frequency", frequency );
    }
    if (debug) {
    }
    belosList.set( "Verbosity", verbosity );
    
    if (set == false) {
      if (procVerbose)
        *out << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      return -1;
    }
    
    
    
    
    
    
    
    RCP< Belos::SolverManager<double,MV,OP> > newSolver = factory.create (solverName, 
rcp(&belosList,
false));
 
    
    newSolver->setProblem( 
rcp(&problem,
false) );
 
    
    if (procVerbose) {
      *out << std::endl << std::endl;
      *out << "Solver: " << solverName << std::endl;
      *out << "Dimension of matrix: " << numGlobalElements << std::endl;
      *out << "Number of right-hand sides: " << numrhs << std::endl;
      *out << "Block size used by solver: " << blockSize << std::endl;
      *out << "Max number of restarts allowed: " << maxRestarts << std::endl;
      *out << "Max number of iterations per linear system: " << maxIters << std::endl;
      *out << "Relative residual tolerance: " << tol << std::endl;
      *out << std::endl;
    }
    
    options.output_fraction = options.output_histogram = options.output_minmax = true;
    stacked_timer->
report(*out, comm, options);
 
    if (xmlOut.length())
      *out << "\nAlso created Watchr performance report " << xmlOut << '\n';
    
    int numIters = newSolver->getNumIters();
    if (procVerbose)
      *out << "Number of iterations performed for this solve: " << numIters << std::endl;
    
    bool badRes = false;
    std::vector<ST> actualResids( numrhs );
    std::vector<ST> rhsNorm( numrhs );
    MV resid(Map, numrhs);
    OPT::Apply( *A, *X, resid );
    MVT::MvAddMv( -1.0, resid, 1.0, *B, resid );
    MVT::MvNorm( resid, actualResids );
    MVT::MvNorm( *B, rhsNorm );
    if (procVerbose) {
      *out<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
      for ( int i=0; i<numrhs; i++) {
        ST actRes = actualResids[i]/rhsNorm[i];
        *out<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
        if (actRes > tol) badRes = true;
      }
    }
      success = false;
      if (procVerbose)
        *out << "End Result: TEST FAILED" << std::endl;
    } else {
      if (procVerbose)
        *out << "End Result: TEST PASSED" << std::endl;
    }
  }
  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
int main(int argc, char *argv[]) {
  
  return run<double>(argc,argv);
  
}