46 #ifdef PACKAGE_BUGREPORT 
   47 #undef PACKAGE_BUGREPORT 
   52 #ifdef PACKAGE_TARNAME 
   53 #undef PACKAGE_TARNAME 
   55 #ifdef PACKAGE_VERSION 
   56 #undef PACKAGE_VERSION 
   61 #include "adolc/adouble.h" 
   62 #include "adolc/drivers/drivers.h" 
   63 #include "adolc/interfaces.h" 
   73   static const unsigned int nqp = 2;
 
   74   static const unsigned int nnode = 2;
 
   84     for (
unsigned int i=0; i<
nqp; i++) {
 
   89       jac[i] = 0.5*mesh_spacing;
 
   92       phi[i][0] = 0.5*(1.0 - xi[i]);
 
   93       phi[i][1] = 0.5*(1.0 + xi[i]);
 
  100 template <
class FadType>
 
  103        const std::vector<double>& x,
 
  104        const std::vector< std::vector<double> >& w,
 
  105        std::vector<FadType>& x_fad,
 
  106        std::vector< std::vector<double> >& w_local) {
 
  107   for (
unsigned int node=0; node<e.
nnode; node++)
 
  108     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  109       x_fad[node*neqn+eqn] = 
FadType(e.
nnode*neqn, node*neqn+eqn, 
 
  110              x[e.
gid[node]*neqn+eqn]);
 
  112   for (
unsigned int col=0; col<w.size(); col++)
 
  113     for (
unsigned int node=0; node<e.
nnode; node++)
 
  114       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  115   w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
 
  120        const std::vector<double>& x,
 
  121        const std::vector< std::vector<double> >& w,
 
  122        std::vector<RadType>& x_rad,
 
  123        std::vector< std::vector<double> >& w_local) {
 
  124   for (
unsigned int node=0; node<e.
nnode; node++)
 
  125     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  126       x_rad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
 
  128   for (
unsigned int col=0; col<w.size(); col++)
 
  129     for (
unsigned int node=0; node<e.
nnode; node++)
 
  130       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  131   w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
 
  136        const std::vector<double>& x,
 
  137        const std::vector< std::vector<double> >& w,
 
  138        std::vector<VRadType>& x_rad,
 
  140   for (
unsigned int node=0; node<e.
nnode; node++)
 
  141     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  142       x_rad[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
 
  144   for (
unsigned int col=0; col<w.size(); col++)
 
  145     for (
unsigned int node=0; node<e.
nnode; node++)
 
  146       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  147   w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
 
  151 void adolc_init_fill(
bool retape,
 
  155          const std::vector<double>& x,
 
  156          const std::vector< std::vector<double> >& w,
 
  157          std::vector<double>& x_local,
 
  158          std::vector<adouble>& x_ad,
 
  162   for (
unsigned int node=0; node<e.
nnode; node++)
 
  163     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  164       x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
 
  166   x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
 
  169   for (
unsigned int col=0; col<w.size(); col++)
 
  170     for (
unsigned int node=0; node<e.
nnode; node++)
 
  171       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  172   w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
 
  178       const std::vector<double>& x,
 
  179       const std::vector< std::vector<double> >& w,
 
  180       std::vector<double>& x_local,
 
  181       std::vector< std::vector<double> >& w_local) {
 
  182   for (
unsigned int node=0; node<e.
nnode; node++) 
 
  183     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  184       x_local[node*neqn+eqn] = x[e.
gid[node]*neqn+eqn];
 
  186   for (
unsigned int node=0; node<e.
nnode; node++) 
 
  187     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  188       for (
unsigned int col=0; col<w.size(); col++)
 
  189   w_local[col][node*neqn+eqn] = w[col][e.
gid[node]*neqn+eqn];
 
  195          const std::vector<T>& x, 
 
  200   for (
unsigned int qp=0; qp<e.
nqp; qp++) {
 
  201     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  202       u[qp*neqn+eqn] = 0.0;
 
  203       du[qp*neqn+eqn] = 0.0;
 
  204       for (
unsigned int node=0; node<e.
nnode; node++) {
 
  205   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
 
  206   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
 
  212   std::vector<T> s(e.
nqp*neqn);
 
  213   for (
unsigned int qp=0; qp<e.
nqp; qp++) {
 
  214     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  215       s[qp*neqn+eqn] = 0.0;
 
  216       for (
unsigned int j=0; j<neqn; j++) {
 
  218           s[qp*neqn+eqn] += u[qp*neqn+j]; 
 
  224   for (
unsigned int node=0; node<e.
nnode; node++) {
 
  225     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  226       unsigned int row = node*neqn+eqn;
 
  228       for (
unsigned int qp=0; qp<e.
nqp; qp++) {
 
  230     e.
w[qp]*e.
jac[qp]*(-(1.0/(e.
jac[qp]*e.
jac[qp]))*
 
  231            du[qp*neqn+eqn]*e.
dphi[qp][node] + 
 
  232            e.
phi[qp][node]*(
exp(u[qp*neqn+eqn]) + 
 
  233                 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
 
  241          const std::vector<double>& x,
 
  242          std::vector< std::vector<double> >& w,
 
  243          std::vector<double>& u, 
 
  244          std::vector<double>& du, 
 
  245          std::vector<double>& f,
 
  246          std::vector< std::vector<double> >& adj) {
 
  248   for (
unsigned int qp=0; qp<e.
nqp; qp++) {
 
  249     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  250       u[qp*neqn+eqn] = 0.0;
 
  251       du[qp*neqn+eqn] = 0.0;
 
  252       for (
unsigned int node=0; node<e.
nnode; node++) {
 
  253   u[qp*neqn+eqn] += x[node*neqn+eqn] * e.
phi[qp][node];
 
  254   du[qp*neqn+eqn] += x[node*neqn+eqn] * e.
dphi[qp][node];
 
  260   std::vector<double> s(e.
nqp*neqn);
 
  261   for (
unsigned int qp=0; qp<e.
nqp; qp++) {
 
  262     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  263       s[qp*neqn+eqn] = 0.0;
 
  264       for (
unsigned int j=0; j<neqn; j++) {
 
  266     s[qp*neqn+eqn] += u[qp*neqn+j]; 
 
  271   for (
unsigned int col=0; col<w.size(); col++)
 
  272     for (
unsigned int node=0; node<e.
nnode; node++)
 
  273       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  274   adj[col][node*neqn+eqn] = 0.0;
 
  277   for (
unsigned int node=0; node<e.
nnode; node++) {
 
  278     for (
unsigned int eqn=0; eqn<neqn; eqn++) {
 
  279       unsigned int row = node*neqn+eqn;
 
  281       for (
unsigned int qp=0; qp<e.
nqp; qp++) {
 
  282   double c1 = e.
w[qp]*e.
jac[qp];
 
  283   double c2 = -(1.0/(e.
jac[qp]*e.
jac[qp]))*e.
dphi[qp][node];
 
  284   double c3 = e.
phi[qp][node]*(
exp(u[qp*neqn+eqn]));
 
  285   double c4 = e.
phi[qp][node]*u[qp*neqn+eqn];
 
  286   double c5 = e.
phi[qp][node]*s[qp*neqn+eqn];
 
  289   f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
 
  290   for (
unsigned int col=0; col<w.size(); col++) {
 
  291     for (
unsigned int node_col=0; node_col<e.
nnode; node_col++) {
 
  292       adj[col][node_col*neqn+eqn] += 
 
  293         c1*(c2*e.
dphi[qp][node_col] + c35*e.
phi[qp][node_col])*w[col][row];
 
  294       for (
unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
 
  296     adj[col][node_col*neqn+eqn_col] += c14*e.
phi[qp][node_col]*w[col][row];
 
  305 template <
class FadType>
 
  308           const std::vector< std::vector<double> >& w_local,
 
  309           const std::vector<FadType>& f_fad, 
 
  310           std::vector<double>& f,
 
  311           std::vector< std::vector<double> >& adj) {
 
  313   for (
unsigned int node=0; node<e.
nnode; node++)
 
  314     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  315       f[e.
gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
 
  318   for (
unsigned int col=0; col<w_local.size(); col++) {
 
  320     for (
unsigned int node=0; node<e.
nnode; node++)
 
  321       for (
unsigned int eqn=0; eqn<neqn; eqn++) 
 
  322   z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
 
  324     for (
unsigned int node=0; node<e.
nnode; node++)
 
  325       for (
unsigned int eqn=0; eqn<neqn; eqn++) 
 
  326   adj[col][e.
gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
 
  332           std::vector< std::vector<double> >& w_local,
 
  333           std::vector<RadType>& x_rad,
 
  334           std::vector<RadType>& f_rad, 
 
  335           std::vector<RadType*>& ff_rad,
 
  336           std::vector<double>& f,
 
  337           std::vector< std::vector<double> >& adj) {
 
  339   for (
unsigned int node=0; node<e.
nnode; node++)
 
  340     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  341       f[e.
gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
 
  344   for (
unsigned int col=0; col<w_local.size(); col++) {
 
  348     for (
unsigned int node=0; node<e.
nnode; node++)
 
  349       for (
unsigned int eqn=0; eqn<neqn; eqn++) 
 
  350   adj[col][e.
gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
 
  357            std::vector<std::size_t>& vn,
 
  359            std::vector<VRadType>& x_rad,
 
  360            std::vector<VRadType>& f_rad, 
 
  362            std::vector<double>& f,
 
  363            std::vector< std::vector<double> >& adj) {
 
  365   for (
unsigned int node=0; node<e.
nnode; node++)
 
  366     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  367       f[e.
gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
 
  374   for (
unsigned int col=0; col<ncol; col++)
 
  375     for (
unsigned int node=0; node<e.
nnode; node++)
 
  376       for (
unsigned int eqn=0; eqn<neqn; eqn++) 
 
  377   adj[col][e.
gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
 
  381 void adolc_process_fill(
bool retape,
 
  386       std::vector<double>& x_local,
 
  388       std::vector<adouble>& f_ad,
 
  389       std::vector<double>& f_local,
 
  390       std::vector<double>& f,
 
  392       std::vector< std::vector<double> >& adj) {
 
  394     for (
unsigned int node=0; node<e.
nnode; node++)
 
  395       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  396   f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
 
  400     zos_forward(tag, neqn*e.
nnode, neqn*e.
nnode, 1, &x_local[0], &f_local[0]);
 
  402   fov_reverse(tag, neqn*e.
nnode, neqn*e.
nnode, ncol, w_local, adj_local);
 
  404   for (
unsigned int node=0; node<e.
nnode; node++)
 
  405     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  406       f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
 
  408   for (
unsigned int col=0; col<ncol; col++)
 
  409     for (
unsigned int node=0; node<e.
nnode; node++)
 
  410       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  411   adj[col][e.
gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
 
  417          const std::vector<double>& f_local, 
 
  418          const std::vector< std::vector<double> >& adj_local, 
 
  419          std::vector<double>& f,
 
  420          std::vector< std::vector<double> >& adj) {
 
  421   for (
unsigned int node=0; node<e.
nnode; node++)
 
  422     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  423       f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
 
  425   for (
unsigned int col=0; col<adj.size(); col++)
 
  426     for (
unsigned int node=0; node<e.
nnode; node++)
 
  427       for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  428   adj[col][e.
gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
 
  433          const std::vector<double>& f_local, 
 
  434          std::vector<double>& f) {
 
  435   for (
unsigned int node=0; node<e.
nnode; node++)
 
  436     for (
unsigned int eqn=0; eqn<neqn; eqn++)
 
  437       f[e.
gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
 
  440 template <
typename FadType>
 
  442         unsigned int num_cols, 
double mesh_spacing) {
 
  446   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  447   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
 
  448   std::vector< std::vector<double> > adj(num_cols);
 
  449   for (
unsigned int node=0; node<num_nodes; node++)
 
  450     for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  451       x[node*num_eqns+eqn] = 
 
  452   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
 
  453       f[node*num_eqns+eqn] = 0.0;
 
  456   for (
unsigned int col=0; col<num_cols; col++) {
 
  457     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  458     w_local[col] = std::vector<double>(e.
nnode*num_eqns);
 
  459     adj[col] = std::vector<double>(num_nodes*num_eqns);
 
  460     for (
unsigned int node=0; node<num_nodes; node++)
 
  461       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  462   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  463   adj[col][node*num_eqns+eqn] = 0.0;
 
  469   std::vector<FadType> x_fad(e.
nnode*num_eqns), f_fad(e.
nnode*num_eqns);
 
  470   std::vector<FadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  471   for (
unsigned int i=0; i<num_nodes-1; i++) {
 
  498         unsigned int num_cols, 
double mesh_spacing) {
 
  502   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  503   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
 
  504   std::vector< std::vector<double> > adj(num_cols);
 
  505   for (
unsigned int node=0; node<num_nodes; node++)
 
  506     for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  507       x[node*num_eqns+eqn] = 
 
  508   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
 
  509       f[node*num_eqns+eqn] = 0.0;
 
  512   for (
unsigned int col=0; col<num_cols; col++) {
 
  513     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  514     w_local[col] = std::vector<double>(e.
nnode*num_eqns);
 
  515     adj[col] = std::vector<double>(num_nodes*num_eqns);
 
  516     for (
unsigned int node=0; node<num_nodes; node++)
 
  517       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  518   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  519   adj[col][node*num_eqns+eqn] = 0.0;
 
  525   std::vector<RadType> x_rad(e.
nnode*num_eqns), f_rad(e.
nnode*num_eqns);
 
  526   std::vector<RadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  527   std::vector<RadType*> ff_rad(f_rad.size());
 
  528   for (
unsigned int i=0; i<f_rad.size(); i++)
 
  529     ff_rad[i] = &f_rad[i];
 
  530   for (
unsigned int i=0; i<num_nodes-1; i++) {
 
  557          unsigned int num_cols, 
double mesh_spacing) {
 
  561   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  562   std::vector< std::vector<double> > w(num_cols);
 
  563   double **w_local = 
new double*[num_cols];
 
  564   std::vector< std::vector<double> > adj(num_cols);
 
  565   for (
unsigned int node=0; node<num_nodes; node++)
 
  566     for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  567       x[node*num_eqns+eqn] = 
 
  568   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
 
  569       f[node*num_eqns+eqn] = 0.0;
 
  572   for (
unsigned int col=0; col<num_cols; col++) {
 
  573     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  574     w_local[col] = 
new double[e.
nnode*num_eqns];
 
  575     adj[col] = std::vector<double>(num_nodes*num_eqns);
 
  576     for (
unsigned int node=0; node<num_nodes; node++)
 
  577       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  578   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  579   adj[col][node*num_eqns+eqn] = 0.0;
 
  585   std::vector<VRadType> x_rad(e.
nnode*num_eqns), f_rad(e.
nnode*num_eqns);
 
  586   std::vector<VRadType> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  588   std::vector<std::size_t> vn(num_cols);
 
  589   for (
unsigned int i=0; i<num_cols; i++) {
 
  591     vn[i] = num_eqns*e.
nnode;
 
  592     for (
unsigned int j=0; j<num_eqns*e.
nnode; j++)
 
  593       vf_rad[i][j] = &f_rad[j];
 
  595   for (
unsigned int i=0; i<num_nodes-1; i++) {
 
  604   for (
unsigned int col=0; col<num_cols; col++) {
 
  605     delete [] w_local[col];
 
  606     delete [] vf_rad[col];
 
  629 double adolc_adj_fill(
unsigned int num_nodes, 
unsigned int num_eqns,
 
  630           unsigned int num_cols, 
double mesh_spacing) {
 
  634   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  635   std::vector< std::vector<double> > w(num_cols);
 
  636   std::vector< std::vector<double> > adj(num_cols);
 
  637   for (
unsigned int node=0; node<num_nodes; node++)
 
  638     for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  639       x[node*num_eqns+eqn] = 
 
  640   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
 
  641       f[node*num_eqns+eqn] = 0.0;
 
  644   for (
unsigned int col=0; col<num_cols; col++) {
 
  645     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  646     adj[col] = std::vector<double>(num_nodes*num_eqns);
 
  647     for (
unsigned int node=0; node<num_nodes; node++)
 
  648       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  649   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  650   adj[col][node*num_eqns+eqn] = 0.0;
 
  656   std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
 
  657   std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  658   std::vector<double> x_local(e.
nnode*num_eqns);
 
  659   std::vector<double> f_local(e.
nnode*num_eqns);
 
  660   double **adj_local = 
new double*[num_cols];
 
  661   double **w_local = 
new double*[num_cols];
 
  662   for (
unsigned int i=0; i<num_cols; i++) {
 
  663     adj_local[i] = 
new double[e.
nnode*num_eqns];
 
  664     w_local[i] = 
new double[e.
nnode*num_eqns];
 
  670   adolc_init_fill(
true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
 
  672   adolc_process_fill(
true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad, 
 
  673          f_local, f, adj_local, adj);
 
  676   for (
unsigned int i=1; i<num_nodes-1; i++) {
 
  680     adolc_init_fill(
false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
 
  681     adolc_process_fill(
false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad, 
 
  682            f_local, f, adj_local, adj);
 
  684   for (
unsigned int i=0; i<num_cols; i++) {
 
  685     delete [] adj_local[i];
 
  686     delete [] w_local[i];
 
  705   return timer.totalElapsedTime();
 
  708 double adolc_retape_adj_fill(
unsigned int num_nodes, 
unsigned int num_eqns,
 
  709            unsigned int num_cols, 
double mesh_spacing) {
 
  713   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  714   std::vector< std::vector<double> > w(num_cols);
 
  715   std::vector< std::vector<double> > adj(num_cols);
 
  716   for (
unsigned int node=0; node<num_nodes; node++)
 
  717     for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  718       x[node*num_eqns+eqn] = 
 
  719   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
 
  720       f[node*num_eqns+eqn] = 0.0;
 
  723   for (
unsigned int col=0; col<num_cols; col++) {
 
  724     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  725     adj[col] = std::vector<double>(num_nodes*num_eqns);
 
  726     for (
unsigned int node=0; node<num_nodes; node++)
 
  727       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  728   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  729   adj[col][node*num_eqns+eqn] = 0.0;
 
  735   std::vector<adouble> x_ad(e.
nnode*num_eqns), f_ad(e.
nnode*num_eqns);
 
  736   std::vector<adouble> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  737   std::vector<double> x_local(e.
nnode*num_eqns);
 
  738   std::vector<double> f_local(e.
nnode*num_eqns);
 
  739   double **adj_local = 
new double*[num_cols];
 
  740   double **w_local = 
new double*[num_cols];
 
  741   for (
unsigned int i=0; i<num_cols; i++) {
 
  742     adj_local[i] = 
new double[e.
nnode*num_eqns];
 
  743     w_local[i] = 
new double[e.
nnode*num_eqns];
 
  746   for (
unsigned int i=0; i<num_nodes-1; i++) {
 
  750     adolc_init_fill(
true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
 
  752     adolc_process_fill(
true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad, 
 
  753            f_local, f, adj_local, adj);
 
  755   for (
unsigned int i=0; i<num_cols; i++) {
 
  756     delete [] adj_local[i];
 
  757     delete [] w_local[i];
 
  776   return timer.totalElapsedTime();
 
  781        unsigned int num_cols, 
double mesh_spacing) {
 
  785   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  786   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
 
  787   std::vector< std::vector<double> > adj(num_cols);
 
  788   for (
unsigned int node=0; node<num_nodes; node++)
 
  789     for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  790       x[node*num_eqns+eqn] = 
 
  791   (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
 
  792       f[node*num_eqns+eqn] = 0.0;
 
  795   for (
unsigned int col=0; col<num_cols; col++) {
 
  796     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  797     w_local[col] = std::vector<double>(e.
nnode*num_eqns);
 
  798     adj[col] = std::vector<double>(num_nodes*num_eqns);
 
  799     for (
unsigned int node=0; node<num_nodes; node++)
 
  800       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  801   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  802   adj[col][node*num_eqns+eqn] = 0.0;
 
  808   std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
 
  809   std::vector< std::vector<double> > adj_local(num_cols);
 
  810   std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  811   for (
unsigned int i=0; i<num_cols; i++)
 
  812     adj_local[i] = std::vector<double>(e.
nnode*num_eqns);
 
  813   for (
unsigned int i=0; i<num_nodes-1; i++) {
 
  842          unsigned int num_cols, 
double mesh_spacing) {
 
  846   std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
 
  847   std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
 
  848   for (
unsigned int node_row=0; node_row<num_nodes; node_row++) {
 
  849     for (
unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) { 
 
  850       unsigned int row = node_row*num_eqns + eqn_row;
 
  851       x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
 
  856   for (
unsigned int col=0; col<num_cols; col++) {
 
  857     w[col] = std::vector<double>(num_nodes*num_eqns);
 
  858     w_local[col] = std::vector<double>(e.
nnode*num_eqns);
 
  859     for (
unsigned int node=0; node<num_nodes; node++)
 
  860       for (
unsigned int eqn=0; eqn<num_eqns; eqn++) {
 
  861   w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
 
  867   std::vector<double> x_local(e.
nnode*num_eqns), f_local(e.
nnode*num_eqns);
 
  868   std::vector<double> u(e.
nqp*num_eqns), du(e.
nqp*num_eqns);
 
  869   for (
unsigned int i=0; i<num_nodes-1; i++) {
 
  887 int main(
int argc, 
char* argv[]) {
 
  897     clp.
setDocString(
"This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
 
  898     int num_nodes = 100000;
 
  902     clp.
setOption(
"n", &num_nodes, 
"Number of nodes");
 
  903     clp.
setOption(
"p", &num_eqns, 
"Number of equations");
 
  904     clp.
setOption(
"q", &num_cols, 
"Number of adjoint directions");
 
  905     clp.
setOption(
"rt", &rt, 
"Include ADOL-C retaping test");
 
  909       parseReturn= clp.
parse(argc, argv);
 
  913     double mesh_spacing = 1.0 / 
static_cast<double>(num_nodes - 1);
 
  915     std::cout.setf(std::ios::scientific);
 
  916     std::cout.precision(p);
 
  917     std::cout << 
"num_nodes = " << num_nodes 
 
  918         << 
", num_eqns = " << num_eqns 
 
  919         << 
", num_cols = " << num_cols << 
":  " << std::endl
 
  920         << 
"           " << 
"   Time" << 
"\t"<< 
"Time/Analytic" << 
"\t" 
  921         << 
"Time/Residual" << std::endl;
 
  926     tr = 
residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  929     std::cout << 
"Analytic:  " << std::setw(w) << ta << 
"\t" << std::setw(w) << ta/ta << 
"\t" << std::setw(w) << ta/tr << std::endl;
 
  932     t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  933     std::cout << 
"ADOL-C:    " << std::setw(w) << t << 
"\t" << std::setw(w) << t/ta << 
"\t" << std::setw(w) << t/tr << std::endl;
 
  936       t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  937       std::cout << 
"ADOL-C(rt):" << std::setw(w) << t << 
"\t" << std::setw(w) << t/ta << 
"\t" << std::setw(w) << t/tr << std::endl;
 
  941     t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  942     std::cout << 
"DFad:      " << std::setw(w) << t << 
"\t" << std::setw(w) << t/ta << 
"\t" << std::setw(w) << t/tr << std::endl;
 
  944     t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  945     std::cout << 
"ELRDFad:   " << std::setw(w) << t << 
"\t" << std::setw(w) << t/ta << 
"\t" << std::setw(w) << t/tr << std::endl;
 
  947     t = 
rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  948     std::cout << 
"Rad:       " << std::setw(w) << t << 
"\t" << std::setw(w) << t/ta << 
"\t" << std::setw(w) << t/tr << std::endl;
 
  950     t = 
vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
 
  951     std::cout << 
"Vector Rad:" << std::setw(w) << t << 
"\t" << std::setw(w) << t/ta << 
"\t" << std::setw(w) << t/tr << std::endl;
 
  954   catch (std::exception& e) {
 
  955     std::cout << e.what() << std::endl;
 
  958   catch (
const char *s) {
 
  959     std::cout << s << std::endl;
 
  963     std::cout << 
"Caught unknown exception!" << std::endl;
 
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, double mesh_spacing)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &jac)
Sacado::Fad::DFad< double > FadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &jac_local, std::vector< double > &f, std::vector< std::vector< double > > &jac)
ElemData(double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< FadType > &x_fad)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
void start(bool reset=false)
Sacado::RadVec::ADvar< double > VRadType
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< double > &x_local)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const 
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void setDocString(const char doc_string[])
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &jac)
double totalElapsedTime(bool readCurrentTime=false) const 
Sacado::Rad::ADvar< double > RadType
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)