79 #include "Intrepid_HGRAD_HEX_C1_FEM.hpp" 
   85 #include "Epetra_Time.h" 
   86 #include "Epetra_Map.h" 
   87 #include "Epetra_FECrsMatrix.h" 
   88 #include "Epetra_FEVector.h" 
   89 #include "Epetra_SerialComm.h" 
   92 #include "Teuchos_oblackholestream.hpp" 
   93 #include "Teuchos_RCP.hpp" 
   94 #include "Teuchos_BLAS.hpp" 
   95 #include "Teuchos_Time.hpp" 
   98 #include "Shards_CellTopology.hpp" 
  101 #include "EpetraExt_RowMatrixOut.h" 
  102 #include "EpetraExt_MultiVectorOut.h" 
  103 #include "EpetraExt_MatrixMatrix.h" 
  106 #include "Sacado.hpp" 
  107 #include "Sacado_Fad_DVFad.hpp" 
  108 #include "Sacado_Fad_SimpleFad.hpp" 
  109 #include "Sacado_CacheFad_DFad.hpp" 
  110 #include "Sacado_CacheFad_SFad.hpp" 
  111 #include "Sacado_CacheFad_SLFad.hpp" 
  115 using namespace Intrepid;
 
  117 #define INTREPID_INTEGRATE_COMP_ENGINE COMP_BLAS 
  119 #define BATCH_SIZE 10 
  125 typedef Sacado::CacheFad::SFad<double,8> FadType;
 
  136 template<
class ScalarT>
 
  140 int main(
int argc, 
char *argv[]) {
 
  144       std::cout <<
"\n>>> ERROR: Invalid number of arguments.\n\n";
 
  145       std::cout <<
"Usage:\n\n";
 
  146       std::cout <<
"  ./Intrepid_example_Drivers_Example_03NL.exe NX NY NZ verbose\n\n";
 
  147       std::cout <<
" where \n";
 
  148       std::cout <<
"   int NX              - num intervals in x direction (assumed box domain, 0,1) \n";
 
  149       std::cout <<
"   int NY              - num intervals in y direction (assumed box domain, 0,1) \n";
 
  150       std::cout <<
"   int NZ              - num intervals in z direction (assumed box domain, 0,1) \n";
 
  151       std::cout <<
"   verbose (optional)  - any character, indicates verbose output \n\n";
 
  157     int iprint     = argc - 1;
 
  158     Teuchos::RCP<std::ostream> outStream;
 
  159     Teuchos::oblackholestream bhs; 
 
  161       outStream = Teuchos::rcp(&std::cout, 
false);
 
  163       outStream = Teuchos::rcp(&bhs, 
false);
 
  166     Teuchos::oblackholestream oldFormatState;
 
  167     oldFormatState.copyfmt(std::cout);
 
  170     << 
"===============================================================================\n" \
 
  172     << 
"|  Example: Generate PDE Jacobian for a Nonlinear Reaction-Diffusion          |\n" \
 
  173     << 
"|                   Equation on Hexahedral Mesh                               |\n" \
 
  175     << 
"|  Questions? Contact  Pavel Bochev  (pbboche@sandia.gov),                    |\n" \
 
  176     << 
"|                      Denis Ridzal  (dridzal@sandia.gov),                    |\n" \
 
  177     << 
"|                      Kara Peterson (kjpeter@sandia.gov).                    |\n" \
 
  179     << 
"|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
 
  180     << 
"|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
 
  182     << 
"===============================================================================\n";
 
  187     int NX = atoi(argv[1]);  
 
  188     int NY = atoi(argv[2]);  
 
  189     int NZ = atoi(argv[3]);  
 
  194     typedef shards::CellTopology CellTopology;
 
  195     CellTopology hex_8(shards::getCellTopologyData<shards::Hexahedron<8> >() );
 
  198     int numNodesPerElem = hex_8.getNodeCount();
 
  199     int spaceDim = hex_8.getDimension();
 
  203     *outStream << 
"Generating mesh ... \n\n";
 
  205     *outStream << 
"   NX" << 
"   NY" << 
"   NZ\n";
 
  206     *outStream << std::setw(5) << NX <<
 
  207                   std::setw(5) << NY <<
 
  208                   std::setw(5) << NZ << 
"\n\n";
 
  211     int numElems = NX*NY*NZ;
 
  212     int numNodes = (NX+1)*(NY+1)*(NZ+1);
 
  213     *outStream << 
" Number of Elements: " << numElems << 
" \n";
 
  214     *outStream << 
"    Number of Nodes: " << numNodes << 
" \n\n";
 
  217     double leftX = 0.0, rightX = 1.0;
 
  218     double leftY = 0.0, rightY = 1.0;
 
  219     double leftZ = 0.0, rightZ = 1.0;
 
  222     double hx = (rightX-leftX)/((
double)NX);
 
  223     double hy = (rightY-leftY)/((
double)NY);
 
  224     double hz = (rightZ-leftZ)/((
double)NZ);
 
  230     for (
int k=0; k<NZ+1; k++) {
 
  231       for (
int j=0; j<NY+1; j++) {
 
  232         for (
int i=0; i<NX+1; i++) {
 
  233           nodeCoord(inode,0) = leftX + (double)i*hx;
 
  234           nodeCoord(inode,1) = leftY + (double)j*hy;
 
  235           nodeCoord(inode,2) = leftZ + (double)k*hz;
 
  236           if (k==0 || j==0 || i==0 || k==NZ || j==NY || i==NX){
 
  237              nodeOnBoundary(inode)=1;
 
  240              nodeOnBoundary(inode)=0;
 
  249     ofstream fcoordout(
"coords.dat");
 
  250     for (
int i=0; i<numNodes; i++) {
 
  251        fcoordout << nodeCoord(i,0) <<
" ";
 
  252        fcoordout << nodeCoord(i,1) <<
" ";
 
  253        fcoordout << nodeCoord(i,2) <<
"\n";
 
  262     for (
int k=0; k<NZ; k++) {
 
  263       for (
int j=0; j<NY; j++) {
 
  264         for (
int i=0; i<NX; i++) {
 
  265           elemToNode(ielem,0) = (NY + 1)*(NX + 1)*k + (NX + 1)*j + i;
 
  266           elemToNode(ielem,1) = (NY + 1)*(NX + 1)*k + (NX + 1)*j + i + 1;
 
  267           elemToNode(ielem,2) = (NY + 1)*(NX + 1)*k + (NX + 1)*(j + 1) + i + 1;
 
  268           elemToNode(ielem,3) = (NY + 1)*(NX + 1)*k + (NX + 1)*(j + 1) + i;
 
  269           elemToNode(ielem,4) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*j + i;
 
  270           elemToNode(ielem,5) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*j + i + 1;
 
  271           elemToNode(ielem,6) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*(j + 1) + i + 1;
 
  272           elemToNode(ielem,7) = (NY + 1)*(NX + 1)*(k + 1) + (NX + 1)*(j + 1) + i;
 
  279     ofstream fe2nout(
"elem2node.dat");
 
  280     for (
int k=0; k<NZ; k++) {
 
  281       for (
int j=0; j<NY; j++) {
 
  282         for (
int i=0; i<NX; i++) {
 
  283           int ielem = i + j * NX + k * NX * NY;
 
  284           for (
int m=0; m<numNodesPerElem; m++){
 
  285               fe2nout << elemToNode(ielem,m) <<
"  ";
 
  297     *outStream << 
"Getting cubature ... \n\n";
 
  302     Teuchos::RCP<Cubature<double> > hexCub = cubFactory.
create(hex_8, cubDegree); 
 
  304     int cubDim       = hexCub->getDimension();
 
  305     int numCubPoints = hexCub->getNumPoints();
 
  310     hexCub->getCubature(cubPoints, cubWeights);
 
  315     *outStream << 
"Getting basis ... \n\n";
 
  324     hexHGradBasis.
getValues(hexGVals, cubPoints, OPERATOR_VALUE);
 
  325     hexHGradBasis.
getValues(hexGrads, cubPoints, OPERATOR_GRAD);
 
  330     *outStream << 
"Building PDE Jacobian ... \n\n";
 
  335     int numCells = BATCH_SIZE; 
 
  336     int numBatches = numElems/numCells; 
 
  353     Epetra_SerialComm Comm;
 
  354     Epetra_Map globalMapG(numNodes, 0, Comm);
 
  355     Epetra_FECrsMatrix StiffMatrix(Copy, globalMapG, 64);
 
  370     for (
int c=0; c<numCells; c++) {
 
  371       for(
int f=0; f<numFieldsG; f++) {
 
  372           u_coeffsAD(c,f) = FadType(numFieldsG, f, 1.3);
 
  376     Teuchos::Time timer_jac_analytic(
"Time to compute element PDE Jacobians analytically: ");
 
  377     Teuchos::Time timer_jac_fad     (
"Time to compute element PDE Jacobians using AD:     ");
 
  378     Teuchos::Time timer_jac_insert  (
"Time for global insert,  w/o graph: ");
 
  379     Teuchos::Time timer_jac_insert_g(
"Time for global insert,  w/  graph: ");
 
  380     Teuchos::Time timer_jac_ga      (
"Time for GlobalAssemble, w/o graph: ");
 
  381     Teuchos::Time timer_jac_ga_g    (
"Time for GlobalAssemble, w/  graph: ");
 
  382     Teuchos::Time timer_jac_fc      (
"Time for FillComplete,   w/o graph: ");
 
  383     Teuchos::Time timer_jac_fc_g    (
"Time for FillComplete,   w/  graph: ");
 
  389     for (
int bi=0; bi<numBatches; bi++) {
 
  392       for (
int ci=0; ci<numCells; ci++) {
 
  393         int k = bi*numCells+ci;
 
  394         for (
int i=0; i<numNodesPerElem; i++) {
 
  395             hexNodes(ci,i,0) = nodeCoord(elemToNode(k,i),0);
 
  396             hexNodes(ci,i,1) = nodeCoord(elemToNode(k,i),1);
 
  397             hexNodes(ci,i,2) = nodeCoord(elemToNode(k,i),2);
 
  402       CellTools::setJacobian(hexJacobian, cubPoints, hexNodes, hex_8);
 
  403       CellTools::setJacobianInv(hexJacobInv, hexJacobian );
 
  404       CellTools::setJacobianDet(hexJacobDet, hexJacobian );
 
  409       fst::HGRADtransformGRAD<double>(hexGradsTransformed, hexJacobInv, hexGrads);
 
  412       fst::computeCellMeasure<double>(weightedMeasure, hexJacobDet, cubWeights);
 
  415       fst::multiplyMeasure<double>(hexGradsTransformedWeighted,
 
  416                                    weightedMeasure, hexGradsTransformed);
 
  419       for(
int i=0; i<numFieldsG; i++){
 
  420         u_coeffs(0,i) = u_coeffsAD(0,i).val();
 
  423       timer_jac_analytic.start(); 
 
  425       fst::integrate<double>(localPDEjacobian, hexGradsTransformed, hexGradsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE);
 
  428       u_FE_val.initialize();
 
  429       fst::evaluate<double>(u_FE_val, u_coeffs, hexGValsTransformed);
 
  432       dfunc_u(df_of_u, u_FE_val);
 
  433       fst::scalarMultiplyDataField<double>(df_of_u_times_basis, df_of_u, hexGValsTransformed);
 
  436       fst::integrate<double>(localPDEjacobian, df_of_u_times_basis, hexGValsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE, 
true);
 
  437       timer_jac_analytic.stop(); 
 
  440       for (
int ci=0; ci<numCells; ci++) {
 
  441         int k = bi*numCells+ci;
 
  442         std::vector<int> rowIndex(numFieldsG);
 
  443         std::vector<int> colIndex(numFieldsG);
 
  444         for (
int row = 0; row < numFieldsG; row++){
 
  445           rowIndex[row] = elemToNode(k,row);
 
  447         for (
int col = 0; col < numFieldsG; col++){
 
  448           colIndex[col] = elemToNode(k,col);
 
  454         for (
int row = 0; row < numFieldsG; row++){
 
  455           timer_jac_insert.start();
 
  456           StiffMatrix.InsertGlobalValues(1, &rowIndex[row], numFieldsG, &colIndex[0], &localPDEjacobian(ci,row,0));
 
  457           timer_jac_insert.stop();
 
  464     timer_jac_ga.start(); StiffMatrix.GlobalAssemble(); timer_jac_ga.stop();
 
  465     timer_jac_fc.start(); StiffMatrix.FillComplete(); timer_jac_fc.stop();
 
  472     Epetra_CrsGraph mgraph = StiffMatrix.Graph();
 
  473     Epetra_FECrsMatrix StiffMatrixViaAD(Copy, mgraph);
 
  475     for (
int bi=0; bi<numBatches; bi++) {
 
  480       for (
int ci=0; ci<numCells; ci++) {
 
  481         int k = bi*numCells+ci;
 
  482         for (
int i=0; i<numNodesPerElem; i++) {
 
  483             hexNodes(ci,i,0) = nodeCoord(elemToNode(k,i),0);
 
  484             hexNodes(ci,i,1) = nodeCoord(elemToNode(k,i),1);
 
  485             hexNodes(ci,i,2) = nodeCoord(elemToNode(k,i),2);
 
  490       CellTools::setJacobian(hexJacobian, cubPoints, hexNodes, hex_8);
 
  491       CellTools::setJacobianInv(hexJacobInv, hexJacobian );
 
  492       CellTools::setJacobianDet(hexJacobDet, hexJacobian );
 
  495       fst::HGRADtransformGRAD<double>(hexGradsTransformed, hexJacobInv, hexGrads);
 
  498       fst::computeCellMeasure<double>(weightedMeasure, hexJacobDet, cubWeights);
 
  501       fst::multiplyMeasure<double>(hexGradsTransformedWeighted, weightedMeasure, hexGradsTransformed);
 
  504       fst::HGRADtransformVALUE<double>(hexGValsTransformed, hexGVals);
 
  507       fst::multiplyMeasure<double>(hexGValsTransformedWeighted,
 
  508                                    weightedMeasure, hexGValsTransformed);
 
  510       timer_jac_fad.start(); 
 
  513       u_FE_gradAD.initialize();
 
  514       fst::evaluate<FadType>(u_FE_gradAD, u_coeffsAD, hexGradsTransformed);
 
  518       u_FE_valAD.initialize();
 
  519       fst::evaluate<FadType>(u_FE_valAD, u_coeffsAD, hexGValsTransformed);
 
  521       func_u(f_of_u_AD, u_FE_valAD);
 
  524       fst::integrate<FadType>(cellResidualAD, u_FE_gradAD,  hexGradsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE);
 
  525       fst::integrate<FadType>(cellResidualAD, f_of_u_AD, hexGValsTransformedWeighted, INTREPID_INTEGRATE_COMP_ENGINE, 
true);
 
  526       timer_jac_fad.stop(); 
 
  529       for (
int ci=0; ci<numCells; ci++) {
 
  530         int k = bi*numCells+ci;
 
  531         std::vector<int> rowIndex(numFieldsG);
 
  532         std::vector<int> colIndex(numFieldsG);
 
  533         for (
int row = 0; row < numFieldsG; row++){
 
  534           rowIndex[row] = elemToNode(k,row);
 
  536         for (
int col = 0; col < numFieldsG; col++){
 
  537           colIndex[col] = elemToNode(k,col);
 
  539         for (
int row = 0; row < numFieldsG; row++){
 
  540           timer_jac_insert_g.start();
 
  541           StiffMatrixViaAD.SumIntoGlobalValues(1, &rowIndex[row], numFieldsG, &colIndex[0], cellResidualAD(ci,row).dx());
 
  542           timer_jac_insert_g.stop();
 
  549     timer_jac_ga_g.start(); StiffMatrixViaAD.GlobalAssemble(); timer_jac_ga_g.stop();
 
  550     timer_jac_fc_g.start(); StiffMatrixViaAD.FillComplete(); timer_jac_fc_g.stop();
 
  558     EpetraExt::RowMatrixToMatlabFile(
"stiff_matrix.dat",StiffMatrix);
 
  559     EpetraExt::RowMatrixToMatlabFile(
"stiff_matrixAD.dat",StiffMatrixViaAD);
 
  564     EpetraExt::MatrixMatrix::Add(StiffMatrix, 
false, 1.0, StiffMatrixViaAD, -1.0);
 
  565     double normMat = StiffMatrixViaAD.NormInf();
 
  566     *outStream << 
"Infinity norm of difference between stiffness matrices = " << normMat << 
"\n";
 
  569     *outStream << 
"\n\nNumber of global nonzeros: " << StiffMatrix.NumGlobalNonzeros() << 
"\n\n";
 
  571     *outStream << timer_jac_analytic.name() << 
" " << timer_jac_analytic.totalElapsedTime() << 
" sec\n";
 
  572     *outStream << timer_jac_fad.name()      << 
" " << timer_jac_fad.totalElapsedTime()      << 
" sec\n\n";
 
  573     *outStream << timer_jac_insert.name()   << 
" " << timer_jac_insert.totalElapsedTime()   << 
" sec\n";
 
  574     *outStream << timer_jac_insert_g.name() << 
" " << timer_jac_insert_g.totalElapsedTime() << 
" sec\n\n";
 
  575     *outStream << timer_jac_ga.name()       << 
" " << timer_jac_ga.totalElapsedTime()       << 
" sec\n";
 
  576     *outStream << timer_jac_ga_g.name()     << 
" " << timer_jac_ga_g.totalElapsedTime()     << 
" sec\n\n";
 
  577     *outStream << timer_jac_fc.name()       << 
" " << timer_jac_fc.totalElapsedTime()       << 
" sec\n";
 
  578     *outStream << timer_jac_fc_g.name()     << 
" " << timer_jac_fc_g.totalElapsedTime()     << 
" sec\n\n";
 
  580     if ((normMat < 1.0e4*INTREPID_TOL)) {
 
  581       std::cout << 
"End Result: TEST PASSED\n";
 
  584       std::cout << 
"End Result: TEST FAILED\n";
 
  588     std::cout.copyfmt(oldFormatState);
 
  594 template<
class ScalarT>
 
  598   for(
int c=0; c<num_cells; c++){
 
  599     for(
int p=0; p<num_cub_p; p++){
 
  600       fu(c,p) = std::pow(u(c,p),3) + std::exp(u(c,p));
 
  609   for(
int c=0; c<num_cells; c++) {
 
  610     for(
int p=0; p<num_cub_p; p++) {
 
  611       dfu(c,p) = 3*u(c,p)*u(c,p) + std::exp(u(c,p));
 
virtual int getCardinality() const 
Returns cardinality of the basis. 
int dimension(const int whichDim) const 
Returns the specified dimension. 
Header file for utility class to provide multidimensional containers. 
Header file for the abstract base class Intrepid::DefaultCubatureFactory. 
Implementation of the default H(grad)-compatible FEM basis of degree 1 on Hexahedron cell...
A factory class that generates specific instances of cubatures. 
Teuchos::RCP< Cubature< Scalar, ArrayPoint, ArrayWeight > > create(const shards::CellTopology &cellTopology, const std::vector< int > °ree)
Factory method. 
void getValues(ArrayScalar &outputValues, const ArrayScalar &inputPoints, const EOperator operatorType) const 
Evaluation of a FEM basis on a reference Hexahedron cell.