86 #include "Intrepid_HGRAD_HEX_C1_FEM.hpp" 
   92 #include "Epetra_Time.h" 
   93 #include "Epetra_Map.h" 
   94 #include "Epetra_FECrsMatrix.h" 
   95 #include "Epetra_FEVector.h" 
   96 #include "Epetra_SerialComm.h" 
   99 #include "Teuchos_oblackholestream.hpp" 
  100 #include "Teuchos_RCP.hpp" 
  101 #include "Teuchos_BLAS.hpp" 
  102 #include "Teuchos_Time.hpp" 
  105 #include "Shards_CellTopology.hpp" 
  108 #include "EpetraExt_RowMatrixOut.h" 
  109 #include "EpetraExt_MultiVectorOut.h" 
  110 #include "EpetraExt_MatrixMatrix.h" 
  113 #include "Sacado.hpp" 
  114 #include "Sacado_Fad_DVFad.hpp" 
  115 #include "Sacado_Fad_SimpleFad.hpp" 
  116 #include "Sacado_CacheFad_DFad.hpp" 
  117 #include "Sacado_CacheFad_SFad.hpp" 
  118 #include "Sacado_CacheFad_SLFad.hpp" 
  122 using namespace Intrepid;
 
  124 #define INTREPID_INTEGRATE_COMP_ENGINE COMP_BLAS 
  126 #define BATCH_SIZE 10 
  132 typedef Sacado::CacheFad::SFad<double,8> FadType;
 
  141 double evalu(
double & x, 
double & y, 
double & z);
 
  142 int evalGradu(
double & x, 
double & y, 
double & z, 
double & gradu1, 
double & gradu2, 
double & gradu3);
 
  143 double evalDivGradu(
double & x, 
double & y, 
double & z);
 
  145 int main(
int argc, 
char *argv[]) {
 
  149       std::cout <<
"\n>>> ERROR: Invalid number of arguments.\n\n";
 
  150       std::cout <<
"Usage:\n\n";
 
  151       std::cout <<
"  ./Intrepid_example_Drivers_Example_03AD.exe NX NY NZ verbose\n\n";
 
  152       std::cout <<
" where \n";
 
  153       std::cout <<
"   int NX              - num intervals in x direction (assumed box domain, 0,1) \n";
 
  154       std::cout <<
"   int NY              - num intervals in y direction (assumed box domain, 0,1) \n";
 
  155       std::cout <<
"   int NZ              - num intervals in z direction (assumed box domain, 0,1) \n";
 
  156       std::cout <<
"   verbose (optional)  - any character, indicates verbose output \n\n";
 
  162     int iprint     = argc - 1;
 
  163     Teuchos::RCP<std::ostream> outStream;
 
  164     Teuchos::oblackholestream bhs; 
 
  166       outStream = Teuchos::rcp(&std::cout, 
false);
 
  168       outStream = Teuchos::rcp(&bhs, 
false);
 
  171     Teuchos::oblackholestream oldFormatState;
 
  172     oldFormatState.copyfmt(std::cout);
 
  175     << 
"===============================================================================\n" \
 
  177     << 
"|  Example: Generate Stiffness Matrix and Right Hand Side Vector for          |\n" \
 
  178     << 
"|                   Poisson Equation on Hexahedral Mesh                       |\n" \
 
  180     << 
"|  Questions? Contact  Pavel Bochev  (pbboche@sandia.gov),                    |\n" \
 
  181     << 
"|                      Denis Ridzal  (dridzal@sandia.gov),                    |\n" \
 
  182     << 
"|                      Kara Peterson (kjpeter@sandia.gov).                    |\n" \
 
  184     << 
"|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
 
  185     << 
"|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
 
  187     << 
"===============================================================================\n";
 
  192     int NX = atoi(argv[1]);  
 
  193     int NY = atoi(argv[2]);  
 
  194     int NZ = atoi(argv[3]);  
 
  199     typedef shards::CellTopology CellTopology;
 
  200     CellTopology hex_8(shards::getCellTopologyData<shards::Hexahedron<8> >() );
 
  203     int numNodesPerElem = hex_8.getNodeCount();
 
  204     int spaceDim = hex_8.getDimension();
 
  208     *outStream << 
"Generating mesh ... \n\n";
 
  210     *outStream << 
"   NX" << 
"   NY" << 
"   NZ\n";
 
  211     *outStream << std::setw(5) << NX <<
 
  212                   std::setw(5) << NY <<
 
  213                   std::setw(5) << NZ << 
"\n\n";
 
  216     int numElems = NX*NY*NZ;
 
  217     int numNodes = (NX+1)*(NY+1)*(NZ+1);
 
  218     *outStream << 
" Number of Elements: " << numElems << 
" \n";
 
  219     *outStream << 
"    Number of Nodes: " << numNodes << 
" \n\n";
 
  222     double leftX = 0.0, rightX = 1.0;
 
  223     double leftY = 0.0, rightY = 1.0;
 
  224     double leftZ = 0.0, rightZ = 1.0;
 
  227     double hx = (rightX-leftX)/((
double)NX);
 
  228     double hy = (rightY-leftY)/((
double)NY);
 
  229     double hz = (rightZ-leftZ)/((
double)NZ);
 
  235     for (
int k=0; k<NZ+1; k++) {
 
  236       for (
int j=0; j<NY+1; j++) {
 
  237         for (
int i=0; i<NX+1; i++) {
 
  238           nodeCoord(inode,0) = leftX + (double)i*hx;
 
  239           nodeCoord(inode,1) = leftY + (double)j*hy;
 
  240           nodeCoord(inode,2) = leftZ + (double)k*hz;
 
  241           if (k==0 || j==0 || i==0 || k==NZ || j==NY || i==NX){
 
  242              nodeOnBoundary(inode)=1;
 
  245              nodeOnBoundary(inode)=0;
 
  254     ofstream fcoordout(
"coords.dat");
 
  255     for (
int i=0; i<numNodes; i++) {
 
  256        fcoordout << nodeCoord(i,0) <<
" ";
 
  257        fcoordout << nodeCoord(i,1) <<
" ";
 
  258        fcoordout << nodeCoord(i,2) <<
"\n";
 
  267     for (
int k=0; k<NZ; k++) {
 
  268       for (
int j=0; j<NY; j++) {
 
  269         for (
int i=0; i<NX; i++) {
 
  270           elemToNode(ielem,0) = (NY + 1)*(NX + 1)*k + (NX + 1)*j + i;
 
  271           elemToNode(ielem,1) = (NY + 1)*(NX + 1)*k + (NX + 1)*j + i + 1;
 
  272           elemToNode(ielem,2) = (NY + 1)*(NX + 1)*k + (NX + 1)*(j + 1) + i + 1;
 
  273           elemToNode(ielem,3) = (NY + 1)*(NX + 1)*k + (NX + 1)*(j + 1) + i;
 
  274           elemToNode(ielem,4) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*j + i;
 
  275           elemToNode(ielem,5) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*j + i + 1;
 
  276           elemToNode(ielem,6) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*(j + 1) + i + 1;
 
  277           elemToNode(ielem,7) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*(j + 1) + i;
 
  284     ofstream fe2nout(
"elem2node.dat");
 
  285     for (
int k=0; k<NZ; k++) {
 
  286       for (
int j=0; j<NY; j++) {
 
  287         for (
int i=0; i<NX; i++) {
 
  288           int ielem = i + j * NX + k * NX * NY;
 
  289           for (
int m=0; m<numNodesPerElem; m++){
 
  290               fe2nout << elemToNode(ielem,m) <<
"  ";
 
  302     *outStream << 
"Getting cubature ... \n\n";
 
  307     Teuchos::RCP<Cubature<double> > hexCub = cubFactory.
create(hex_8, cubDegree); 
 
  309     int cubDim       = hexCub->getDimension();
 
  310     int numCubPoints = hexCub->getNumPoints();
 
  315     hexCub->getCubature(cubPoints, cubWeights);
 
  320     *outStream << 
"Getting basis ... \n\n";
 
  329     hexHGradBasis.
getValues(hexGVals, cubPoints, OPERATOR_VALUE);
 
  330     hexHGradBasis.
getValues(hexGrads, cubPoints, OPERATOR_GRAD);
 
  335     *outStream << 
"Building stiffness matrix and right hand side ... \n\n";
 
  340     int numCells = BATCH_SIZE; 
 
  341     int numBatches = numElems/numCells; 
 
  363     Epetra_SerialComm Comm;
 
  364     Epetra_Map globalMapG(numNodes, 0, Comm);
 
  365     Epetra_FECrsMatrix StiffMatrix(Copy, globalMapG, 64);
 
  366     Epetra_FEVector rhs(globalMapG);
 
  367     Epetra_FEVector rhsViaAD(globalMapG);
 
  373     for (
int ci=0; ci<numCells; ci++) {
 
  374       for(
int j=0; j<numFieldsG; j++) {
 
  375           x_fad(ci,j) = FadType(numFieldsG, j, 0.0);
 
  379     Teuchos::Time timer_jac_analytic(
"Time to compute element PDE Jacobians analytically: ");
 
  380     Teuchos::Time timer_jac_fad     (
"Time to compute element PDE Jacobians using AD:     ");
 
  381     Teuchos::Time timer_jac_insert  (
"Time for global insert,  w/o graph: ");
 
  382     Teuchos::Time timer_jac_insert_g(
"Time for global insert,  w/  graph: ");
 
  383     Teuchos::Time timer_jac_ga      (
"Time for GlobalAssemble, w/o graph: ");
 
  384     Teuchos::Time timer_jac_ga_g    (
"Time for GlobalAssemble, w/  graph: ");
 
  385     Teuchos::Time timer_jac_fc      (
"Time for FillComplete,   w/o graph: ");
 
  386     Teuchos::Time timer_jac_fc_g    (
"Time for FillComplete,   w/  graph: ");
 
  392     for (
int bi=0; bi<numBatches; bi++) {
 
  395       for (
int ci=0; ci<numCells; ci++) {
 
  396         int k = bi*numCells+ci;
 
  397         for (
int i=0; i<numNodesPerElem; i++) {
 
  398             hexNodes(ci,i,0) = nodeCoord(elemToNode(k,i),0);
 
  399             hexNodes(ci,i,1) = nodeCoord(elemToNode(k,i),1);
 
  400             hexNodes(ci,i,2) = nodeCoord(elemToNode(k,i),2);
 
  405       CellTools::setJacobian(hexJacobian, cubPoints, hexNodes, hex_8);
 
  406       CellTools::setJacobianInv(hexJacobInv, hexJacobian );
 
  407       CellTools::setJacobianDet(hexJacobDet, hexJacobian );
 
  412       fst::HGRADtransformGRAD<double>(hexGradsTransformed, hexJacobInv, hexGrads);
 
  415       fst::computeCellMeasure<double>(weightedMeasure, hexJacobDet, cubWeights);
 
  418       fst::multiplyMeasure<double>(hexGradsTransformedWeighted,
 
  419                                    weightedMeasure, hexGradsTransformed);
 
  422       timer_jac_analytic.start();
 
  423       fst::integrate<double>(localStiffMatrix,
 
  424                              hexGradsTransformed, hexGradsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE);
 
  425       timer_jac_analytic.stop();
 
  428       for (
int ci=0; ci<numCells; ci++) {
 
  429         int k = bi*numCells+ci;
 
  430         std::vector<int> rowIndex(numFieldsG);
 
  431         std::vector<int> colIndex(numFieldsG);
 
  432         for (
int row = 0; row < numFieldsG; row++){
 
  433           rowIndex[row] = elemToNode(k,row);
 
  435         for (
int col = 0; col < numFieldsG; col++){
 
  436           colIndex[col] = elemToNode(k,col);
 
  442         for (
int row = 0; row < numFieldsG; row++){
 
  443           timer_jac_insert.start();
 
  444           StiffMatrix.InsertGlobalValues(1, &rowIndex[row], numFieldsG, &colIndex[0], &localStiffMatrix(ci,row,0));
 
  445           timer_jac_insert.stop();
 
  452       CellTools::mapToPhysicalFrame(physCubPoints, cubPoints, hexNodes, hex_8);
 
  455       for (
int ci=0; ci<numCells; ci++) {
 
  456         for (
int nPt = 0; nPt < numCubPoints; nPt++){
 
  457             double x = physCubPoints(ci,nPt,0);
 
  458             double y = physCubPoints(ci,nPt,1);
 
  459             double z = physCubPoints(ci,nPt,2);
 
  460             rhsData(ci,nPt) = evalDivGradu(x, y, z);
 
  465       fst::HGRADtransformVALUE<double>(hexGValsTransformed, hexGVals);
 
  468       fst::multiplyMeasure<double>(hexGValsTransformedWeighted,
 
  469                                    weightedMeasure, hexGValsTransformed);
 
  472       fst::integrate<double>(localRHS, rhsData, hexGValsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE);
 
  475       for (
int ci=0; ci<numCells; ci++) {
 
  476         int k = bi*numCells+ci;
 
  477         for (
int row = 0; row < numFieldsG; row++){
 
  478             int rowIndex = elemToNode(k,row);
 
  479             double val = -localRHS(ci,row);
 
  480             rhs.SumIntoGlobalValues(1, &rowIndex, &val);
 
  487     timer_jac_ga.start(); StiffMatrix.GlobalAssemble(); timer_jac_ga.stop();
 
  488     timer_jac_fc.start(); StiffMatrix.FillComplete(); timer_jac_fc.stop();
 
  489     rhs.GlobalAssemble();
 
  496     Epetra_CrsGraph mgraph = StiffMatrix.Graph();
 
  497     Epetra_FECrsMatrix StiffMatrixViaAD(Copy, mgraph);
 
  500     for (
int bi=0; bi<numBatches; bi++) {
 
  505       for (
int ci=0; ci<numCells; ci++) {
 
  506         int k = bi*numCells+ci;
 
  507         for (
int i=0; i<numNodesPerElem; i++) {
 
  508             hexNodes(ci,i,0) = nodeCoord(elemToNode(k,i),0);
 
  509             hexNodes(ci,i,1) = nodeCoord(elemToNode(k,i),1);
 
  510             hexNodes(ci,i,2) = nodeCoord(elemToNode(k,i),2);
 
  515       CellTools::setJacobian(hexJacobian, cubPoints, hexNodes, hex_8);
 
  516       CellTools::setJacobianInv(hexJacobInv, hexJacobian );
 
  517       CellTools::setJacobianDet(hexJacobDet, hexJacobian );
 
  520       fst::HGRADtransformGRAD<double>(hexGradsTransformed, hexJacobInv, hexGrads);
 
  523       fst::computeCellMeasure<double>(weightedMeasure, hexJacobDet, cubWeights);
 
  526       fst::multiplyMeasure<double>(hexGradsTransformedWeighted, weightedMeasure, hexGradsTransformed);
 
  529       CellTools::mapToPhysicalFrame(physCubPoints, cubPoints, hexNodes, hex_8);
 
  532       for (
int ci=0; ci<numCells; ci++) {
 
  533         for (
int nPt = 0; nPt < numCubPoints; nPt++){
 
  534             double x = physCubPoints(ci,nPt,0);
 
  535             double y = physCubPoints(ci,nPt,1);
 
  536             double z = physCubPoints(ci,nPt,2);
 
  537             rhsData(ci,nPt) = evalDivGradu(x, y, z);
 
  542       fst::HGRADtransformVALUE<double>(hexGValsTransformed, hexGVals);
 
  545       fst::multiplyMeasure<double>(hexGValsTransformedWeighted,
 
  546                                    weightedMeasure, hexGValsTransformed);
 
  548       timer_jac_fad.start();
 
  554       fst::evaluate<FadType>(FEFunc,x_fad,hexGradsTransformed);
 
  557       fst::integrate<FadType>(cellResidualAD, FEFunc,  hexGradsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE);
 
  558       timer_jac_fad.stop();
 
  559       fst::integrate<FadType>(cellResidualAD, rhsData, hexGValsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE, 
true);
 
  562       for (
int ci=0; ci<numCells; ci++) {
 
  563         int k = bi*numCells+ci;
 
  564         std::vector<int> rowIndex(numFieldsG);
 
  565         std::vector<int> colIndex(numFieldsG);
 
  566         for (
int row = 0; row < numFieldsG; row++){
 
  567           rowIndex[row] = elemToNode(k,row);
 
  569         for (
int col = 0; col < numFieldsG; col++){
 
  570           colIndex[col] = elemToNode(k,col);
 
  572         for (
int row = 0; row < numFieldsG; row++){
 
  573           timer_jac_insert_g.start();
 
  574           StiffMatrixViaAD.SumIntoGlobalValues(1, &rowIndex[row], numFieldsG, &colIndex[0], cellResidualAD(ci,row).dx());
 
  575           timer_jac_insert_g.stop();
 
  580       for (
int ci=0; ci<numCells; ci++) {
 
  581         int k = bi*numCells+ci;
 
  582         for (
int row = 0; row < numFieldsG; row++){
 
  583             int rowIndex = elemToNode(k,row);
 
  584             double val = -cellResidualAD(ci,row).val();
 
  585             rhsViaAD.SumIntoGlobalValues(1, &rowIndex, &val);
 
  592     timer_jac_ga_g.start(); StiffMatrixViaAD.GlobalAssemble(); timer_jac_ga_g.stop();
 
  593     timer_jac_fc_g.start(); StiffMatrixViaAD.FillComplete(); timer_jac_fc_g.stop();
 
  594     rhsViaAD.GlobalAssemble();
 
  602     EpetraExt::RowMatrixToMatlabFile(
"stiff_matrix.dat",StiffMatrix);
 
  603     EpetraExt::RowMatrixToMatlabFile(
"stiff_matrixAD.dat",StiffMatrixViaAD);
 
  604     EpetraExt::MultiVectorToMatrixMarketFile(
"rhs_vector.dat",rhs,0,0,
false);
 
  605     EpetraExt::MultiVectorToMatrixMarketFile(
"rhs_vectorAD.dat",rhsViaAD,0,0,
false);
 
  610     EpetraExt::MatrixMatrix::Add(StiffMatrix, 
false, 1.0, StiffMatrixViaAD, -1.0);
 
  611     double normMat = StiffMatrixViaAD.NormInf();
 
  612     *outStream << 
"Infinity norm of difference between stiffness matrices = " << normMat << 
"\n";
 
  617     rhsViaAD.Update(-1.0, rhs, 1.0);
 
  618     rhsViaAD.NormInf(&normVec);
 
  619     *outStream << 
"Infinity norm of difference between right-hand side vectors = " << normVec << 
"\n";
 
  621     *outStream << 
"\n\nNumber of global nonzeros: " << StiffMatrix.NumGlobalNonzeros() << 
"\n\n";
 
  623     *outStream << timer_jac_analytic.name() << 
" " << timer_jac_analytic.totalElapsedTime() << 
" sec\n";
 
  624     *outStream << timer_jac_fad.name()      << 
" " << timer_jac_fad.totalElapsedTime()      << 
" sec\n\n";
 
  625     *outStream << timer_jac_insert.name()   << 
" " << timer_jac_insert.totalElapsedTime()   << 
" sec\n";
 
  626     *outStream << timer_jac_insert_g.name() << 
" " << timer_jac_insert_g.totalElapsedTime() << 
" sec\n\n";
 
  627     *outStream << timer_jac_ga.name()       << 
" " << timer_jac_ga.totalElapsedTime()       << 
" sec\n";
 
  628     *outStream << timer_jac_ga_g.name()     << 
" " << timer_jac_ga_g.totalElapsedTime()     << 
" sec\n\n";
 
  629     *outStream << timer_jac_fc.name()       << 
" " << timer_jac_fc.totalElapsedTime()       << 
" sec\n";
 
  630     *outStream << timer_jac_fc_g.name()     << 
" " << timer_jac_fc_g.totalElapsedTime()     << 
" sec\n\n";
 
  650    if ((normMat < 1.0e4*INTREPID_TOL) && (normVec < 1.0e4*INTREPID_TOL)) {
 
  651      std::cout << 
"End Result: TEST PASSED\n";
 
  654      std::cout << 
"End Result: TEST FAILED\n";
 
  658    std::cout.copyfmt(oldFormatState);
 
  665  double evalDivGradu(
double & x, 
double & y, 
double & z)
 
  673    double divGradu = -3.0*M_PI*M_PI*sin(M_PI*x)*sin(M_PI*y)*sin(M_PI*z)*exp(x+y+z)
 
  674                     + 2.0*M_PI*cos(M_PI*x)*sin(M_PI*y)*sin(M_PI*z)*exp(x+y+z)
 
  675                     + 2.0*M_PI*cos(M_PI*y)*sin(M_PI*x)*sin(M_PI*z)*exp(x+y+z)
 
  676                     + 2.0*M_PI*cos(M_PI*z)*sin(M_PI*x)*sin(M_PI*y)*exp(x+y+z)
 
  677                     + 3.0*sin(M_PI*x)*sin(M_PI*y)*sin(M_PI*z)*exp(x+y+z);
 
virtual int getCardinality() const 
Returns cardinality of the basis. 
Header file for utility class to provide multidimensional containers. 
Header file for the abstract base class Intrepid::DefaultCubatureFactory. 
Implementation of the default H(grad)-compatible FEM basis of degree 1 on Hexahedron cell...
A factory class that generates specific instances of cubatures. 
Teuchos::RCP< Cubature< Scalar, ArrayPoint, ArrayWeight > > create(const shards::CellTopology &cellTopology, const std::vector< int > °ree)
Factory method. 
void getValues(ArrayScalar &outputValues, const ArrayScalar &inputPoints, const EOperator operatorType) const 
Evaluation of a FEM basis on a reference Hexahedron cell.