Intrepid2
Intrepid2_HierarchicalBasis_HDIV_PYR.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Intrepid2 Package
4 //
5 // Copyright 2007 NTESS and the Intrepid2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
19 #ifndef Intrepid2_HierarchicalBasis_HDIV_PYR_h
20 #define Intrepid2_HierarchicalBasis_HDIV_PYR_h
21 
22 #include <Kokkos_DynRankView.hpp>
23 
24 #include <Intrepid2_config.h>
25 
26 #include "Intrepid2_Basis.hpp"
32 #include "Intrepid2_Utils.hpp"
33 
34 #include <math.h>
35 
36 #include "Teuchos_RCP.hpp"
37 
38 namespace Intrepid2
39 {
45  template<class DeviceType, class OutputScalar, class PointScalar,
46  class OutputFieldType, class InputPointsType>
48  {
49  using ExecutionSpace = typename DeviceType::execution_space;
50  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
51  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
52  using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
53  using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
54 
55  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
56  using TeamMember = typename TeamPolicy::member_type;
57 
58  EOperator opType_;
59 
60  OutputFieldType output_; // F,P
61  InputPointsType inputPoints_; // P,D
62 
63  int polyOrder_;
64  bool useCGBasis_;
65  int numFields_, numPoints_;
66 
67  size_t fad_size_output_;
68 
69  static const int numVertices = 5;
70  static const int numMixedEdges = 4;
71  static const int numTriEdges = 4;
72  static const int numEdges = 8;
73  // the following ordering of the edges matches that used by ESEAS
74  // (it *looks* like this is what ESEAS uses; the basis comparison tests will clarify whether I've read their code correctly)
75  // see also PyramidEdgeNodeMap in Shards_BasicTopologies.hpp
76  const int edge_start_[numEdges] = {0,1,2,3,0,1,2,3}; // edge i is from edge_start_[i] to edge_end_[i]
77  const int edge_end_[numEdges] = {1,2,3,0,4,4,4,4}; // edge i is from edge_start_[i] to edge_end_[i]
78 
79  // quadrilateral face comes first
80  static const int numQuadFaces = 1;
81  static const int numTriFaces = 4;
82 
83  // face ordering matches ESEAS. (See BlendProjectPyraTF in ESEAS.)
84  const int tri_face_vertex_0[numTriFaces] = {0,1,3,0}; // faces are abc where 0 ≤ a < b < c ≤ 3
85  const int tri_face_vertex_1[numTriFaces] = {1,2,2,3};
86  const int tri_face_vertex_2[numTriFaces] = {4,4,4,4};
87 
88  Hierarchical_HDIV_PYR_Functor(EOperator opType, OutputFieldType output, InputPointsType inputPoints,
89  int polyOrder)
90  : opType_(opType), output_(output), inputPoints_(inputPoints),
91  polyOrder_(polyOrder),
92  fad_size_output_(getScalarDimensionForView(output))
93  {
94  numFields_ = output.extent_int(0);
95  numPoints_ = output.extent_int(1);
96  const auto & p = polyOrder;
97  const auto basisCardinality = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
98 
99  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument, "point counts need to match!");
100  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != basisCardinality, std::invalid_argument, "output field size does not match basis cardinality");
101  }
102 
104  KOKKOS_INLINE_FUNCTION
105  void cross(Kokkos::Array<OutputScalar,3> &c,
106  const Kokkos::Array<OutputScalar,3> &a,
107  const Kokkos::Array<OutputScalar,3> &b) const
108  {
109  c[0] = a[1] * b[2] - a[2] * b[1];
110  c[1] = a[2] * b[0] - a[0] * b[2];
111  c[2] = a[0] * b[1] - a[1] * b[0];
112  }
113 
115  KOKKOS_INLINE_FUNCTION
116  void dot(OutputScalar &c,
117  const Kokkos::Array<OutputScalar,3> &a,
118  const Kokkos::Array<OutputScalar,3> &b) const
119  {
120  c = 0;
121  for (ordinal_type d=0; d<3; d++)
122  {
123  c += a[d] * b[d];
124  }
125  }
126 
127  KOKKOS_INLINE_FUNCTION
128  OutputScalar dot(const Kokkos::Array<OutputScalar,3> &a,
129  const Kokkos::Array<OutputScalar,3> &b) const
130  {
131  OutputScalar c = 0;
132  for (ordinal_type d=0; d<3; d++)
133  {
134  c += a[d] * b[d];
135  }
136  return c;
137  }
138 
139  KOKKOS_INLINE_FUNCTION
140  void E_E(Kokkos::Array<OutputScalar,3> &EE,
141  const ordinal_type &i,
142  const OutputScratchView &PHom,
143  const PointScalar &s0, const PointScalar &s1,
144  const Kokkos::Array<PointScalar,3> &s0_grad,
145  const Kokkos::Array<PointScalar,3> &s1_grad) const
146  {
147  const auto & P_i = PHom(i);
148  for (ordinal_type d=0; d<3; d++)
149  {
150  EE[d] = P_i * (s0 * s1_grad[d] - s1 * s0_grad[d]);
151  }
152  }
153 
154  KOKKOS_INLINE_FUNCTION
155  void E_E_CURL(Kokkos::Array<OutputScalar,3> &curl_EE,
156  const ordinal_type &i,
157  const OutputScratchView &PHom,
158  const PointScalar &s0, const PointScalar &s1,
159  const Kokkos::Array<PointScalar,3> &s0_grad,
160  const Kokkos::Array<PointScalar,3> &s1_grad) const
161  {
162  // curl (E_i^E)(s0,s1) = (i+2) [P_i](s0,s1) (grad s0 x grad s1)
163  OutputScalar ip2_Pi = (i+2) * PHom(i);
164  cross(curl_EE, s0_grad, s1_grad);
165  for (ordinal_type d=0; d<3; d++)
166  {
167  curl_EE[d] *= ip2_Pi;
168  }
169  }
170 
173  KOKKOS_INLINE_FUNCTION
174  void V_QUAD(Kokkos::Array<OutputScalar,3> &VQUAD,
175  const ordinal_type &i, const ordinal_type &j,
176  const OutputScratchView &PHom_s,
177  const PointScalar &s0, const PointScalar &s1,
178  const Kokkos::Array<PointScalar,3> &s0_grad,
179  const Kokkos::Array<PointScalar,3> &s1_grad,
180  const OutputScratchView &PHom_t,
181  const PointScalar &t0, const PointScalar &t1,
182  const Kokkos::Array<PointScalar,3> &t0_grad,
183  const Kokkos::Array<PointScalar,3> &t1_grad) const
184  {
185  Kokkos::Array<OutputScalar,3> EE_i, EE_j;
186 
187  E_E(EE_i, i, PHom_s, s0, s1, s0_grad, s1_grad);
188  E_E(EE_j, j, PHom_t, t0, t1, t0_grad, t1_grad);
189 
190  // VQUAD = EE_i x EE_j:
191  cross(VQUAD, EE_i, EE_j);
192  }
193 
196  KOKKOS_INLINE_FUNCTION
197  void E_QUAD(Kokkos::Array<OutputScalar,3> &EQUAD,
198  const ordinal_type &i, const ordinal_type &j,
199  const OutputScratchView &HomPi_s01,
200  const PointScalar &s0, const PointScalar &s1,
201  const Kokkos::Array<PointScalar,3> &s0_grad,
202  const Kokkos::Array<PointScalar,3> &s1_grad,
203  const OutputScratchView &HomLi_t01) const
204  {
205  const OutputScalar &phiE_j = HomLi_t01(j);
206 
207  Kokkos::Array<OutputScalar,3> EE_i;
208  E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
209 
210  for (ordinal_type d=0; d<3; d++)
211  {
212  EQUAD[d] = phiE_j * EE_i[d];
213  }
214  }
215 
216  KOKKOS_INLINE_FUNCTION
217  void E_QUAD_CURL(Kokkos::Array<OutputScalar,3> &EQUAD_CURL,
218  const ordinal_type &i, const ordinal_type &j,
219  const OutputScratchView &HomPi_s01,
220  const PointScalar &s0, const PointScalar &s1,
221  const Kokkos::Array<PointScalar,3> &s0_grad,
222  const Kokkos::Array<PointScalar,3> &s1_grad,
223  const OutputScratchView &HomPj_t01,
224  const OutputScratchView &HomLj_t01,
225  const OutputScratchView &HomLj_dt_t01,
226  const Kokkos::Array<PointScalar,3> &t0_grad,
227  const Kokkos::Array<PointScalar,3> &t1_grad) const
228  {
229  const OutputScalar &phiE_j = HomLj_t01(j);
230 
231  Kokkos::Array<OutputScalar,3> curl_EE_i;
232  E_E_CURL(curl_EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
233 
234  Kokkos::Array<OutputScalar,3> EE_i;
235  E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
236 
237  Kokkos::Array<OutputScalar,3> grad_phiE_j;
238  computeGradHomLi(grad_phiE_j, j, HomPj_t01, HomLj_dt_t01, t0_grad, t1_grad);
239 
240  cross(EQUAD_CURL, grad_phiE_j, EE_i);
241  for (ordinal_type d=0; d<3; d++)
242  {
243  EQUAD_CURL[d] += phiE_j * curl_EE_i[d];
244  }
245  }
246 
248  KOKKOS_INLINE_FUNCTION
249  void V_LEFT_TRI(Kokkos::Array<OutputScalar,3> &VLEFTTRI,
250  const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
251  const OutputScalar &phi_j, const Kokkos::Array<OutputScalar,3> &phi_j_grad,
252  const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
253  cross(VLEFTTRI, phi_i_grad, phi_j_grad);
254  const OutputScalar t0_2 = t0 * t0;
255  for (ordinal_type d=0; d<3; d++)
256  {
257  VLEFTTRI[d] *= t0_2;
258  }
259 
260  Kokkos::Array<OutputScalar,3> tmp_t0_grad_t0, tmp_diff, tmp_cross;
261  for (ordinal_type d=0; d<3; d++)
262  {
263  tmp_t0_grad_t0[d] = t0 * t0_grad[d];
264  tmp_diff[d] = phi_i * phi_j_grad[d] - phi_j * phi_i_grad[d];
265  }
266  cross(tmp_cross, tmp_t0_grad_t0, tmp_diff);
267 
268  for (ordinal_type d=0; d<3; d++)
269  {
270  VLEFTTRI[d] += tmp_cross[d];
271  }
272  };
273 
274 
276  KOKKOS_INLINE_FUNCTION
277  void V_RIGHT_TRI(Kokkos::Array<OutputScalar,3> &VRIGHTTRI,
278  const OutputScalar &mu1, const Kokkos::Array<OutputScalar,3> &mu1_grad,
279  const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
280  const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
281  Kokkos::Array<OutputScalar,3> left_vector; // left vector in the cross product we take below.
282 
283  const OutputScalar t0_2 = t0 * t0;
284  for (ordinal_type d=0; d<3; d++)
285  {
286  left_vector[d] = t0_2 * phi_i_grad[d] + 2. * t0 * phi_i * t0_grad[d];
287  }
288  cross(VRIGHTTRI, left_vector, mu1_grad);
289  };
290 
291  // This is the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
292  KOKKOS_INLINE_FUNCTION
293  void V_TRI(Kokkos::Array<OutputScalar,3> &VTRI,
294  const ordinal_type &i, // i >= 0
295  const ordinal_type &j, // j >= 0
296  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
297  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
298  const Kokkos::Array<PointScalar,3> &vectorWeight) const // s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1) -- see computeFaceVectorWeight()
299  {
300  const auto &P_i = P(i);
301  const auto &P_2ip1_j = P_2ip1(j);
302 
303  for (ordinal_type d=0; d<3; d++)
304  {
305  VTRI[d] = P_i * P_2ip1_j * vectorWeight[d];
306  }
307  }
308 
309  // Divergence of the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
310  KOKKOS_INLINE_FUNCTION
311  void V_TRI_DIV(OutputScalar &VTRI_DIV,
312  const ordinal_type &i, // i >= 0
313  const ordinal_type &j, // j >= 0
314  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
315  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
316  const OutputScalar &divWeight) const // grad s0 \dot (grad s1 x grad s2)
317  {
318  const auto &P_i = P(i);
319  const auto &P_2ip1_j = P_2ip1(j);
320 
321  VTRI_DIV = (i + j + 3.) * P_i * P_2ip1_j * divWeight;
322  }
323 
325  KOKKOS_INLINE_FUNCTION
326  void V_TRI_B42(Kokkos::Array<OutputScalar,3> &VTRI_mus0_mus1_s2_over_mu,
327  const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
328  const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
329  const OutputScalar &s2,
330  const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
331  const ordinal_type &i, // i >= 0
332  const ordinal_type &j, // j >= 0
333  const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
334  const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
335  ) const
336  {
337  const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
338  const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
339 
340  // place s2 (grad mu) x E^E_0 in result container
341  cross(VTRI_mus0_mus1_s2_over_mu, mu_grad, EE_0_s0_s1);
342  for (ordinal_type d=0; d<3; d++)
343  {
344  VTRI_mus0_mus1_s2_over_mu[d] *= s2;
345  }
346 
347  // add mu V^{tri}_00 to it:
348  for (ordinal_type d=0; d<3; d++)
349  {
350  VTRI_mus0_mus1_s2_over_mu[d] += mu * VTRI_00_s0_s1_s2[d];
351  }
352 
353  // multiply by [P_i, P^{2i+1}_j]
354  for (ordinal_type d=0; d<3; d++)
355  {
356  VTRI_mus0_mus1_s2_over_mu[d] *= Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2;
357  }
358  }
359 
361  KOKKOS_INLINE_FUNCTION
362  void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu,
363  const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
364  const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
365  const OutputScalar &s2, const Kokkos::Array<OutputScalar,3> &s2_grad,
366  const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
367  const ordinal_type &i, // i >= 0
368  const ordinal_type &j, // j >= 0
369  const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
370  const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
371  ) const
372  {
373  const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
374  const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
375 
376  Kokkos::Array<OutputScalar,3> vector;
377 
378  // place E^E_0 x grad s2 in scratch vector container
379  cross(vector, EE_0_s0_s1, s2_grad);
380  // multiply by (i+j+3)
381  for (ordinal_type d=0; d<3; d++)
382  {
383  vector[d] *= i+j+3.;
384  }
385  // subtract V_00
386  for (ordinal_type d=0; d<3; d++)
387  {
388  vector[d] -= VTRI_00_s0_s1_s2[d];
389  }
390 
391  OutputScalar dot_product;
392  dot(dot_product, mu_grad, vector);
393 
394  div_VTRI_mus0_mus1_s2_over_mu = Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2 * dot_product;
395  }
396 
397  KOKKOS_INLINE_FUNCTION
398  void computeFaceVectorWeight(Kokkos::Array<OutputScalar,3> &vectorWeight,
399  const PointScalar &s0, const Kokkos::Array<PointScalar,3> &s0Grad,
400  const PointScalar &s1, const Kokkos::Array<PointScalar,3> &s1Grad,
401  const PointScalar &s2, const Kokkos::Array<PointScalar,3> &s2Grad) const
402  {
403  // compute s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1)
404 
405  Kokkos::Array<Kokkos::Array<PointScalar,3>,3> cross_products;
406 
407  cross(cross_products[0], s1Grad, s2Grad);
408  cross(cross_products[1], s2Grad, s0Grad);
409  cross(cross_products[2], s0Grad, s1Grad);
410 
411  Kokkos::Array<PointScalar,3> s {s0,s1,s2};
412 
413  for (ordinal_type d=0; d<3; d++)
414  {
415  OutputScalar v_d = 0;
416  for (ordinal_type i=0; i<3; i++)
417  {
418  v_d += s[i] * cross_products[i][d];
419  }
420  vectorWeight[d] = v_d;
421  }
422  }
423 
424  KOKKOS_INLINE_FUNCTION
425  void computeFaceDivWeight(OutputScalar &divWeight,
426  const Kokkos::Array<PointScalar,3> &s0Grad,
427  const Kokkos::Array<PointScalar,3> &s1Grad,
428  const Kokkos::Array<PointScalar,3> &s2Grad) const
429  {
430  // grad s0 \dot (grad s1 x grad s2)
431 
432  Kokkos::Array<PointScalar,3> grad_s1_cross_grad_s2;
433  cross(grad_s1_cross_grad_s2, s1Grad, s2Grad);
434 
435  dot(divWeight, s0Grad, grad_s1_cross_grad_s2);
436  }
437 
438  KOKKOS_INLINE_FUNCTION
439  void computeGradHomLi(Kokkos::Array<OutputScalar,3> &HomLi_grad, // grad [L_i](s0,s1)
440  const ordinal_type i,
441  const OutputScratchView &HomPi_s0s1, // [P_i](s0,s1)
442  const OutputScratchView &HomLi_dt_s0s1, // [d/dt L_i](s0,s1)
443  const Kokkos::Array<PointScalar,3> &s0Grad,
444  const Kokkos::Array<PointScalar,3> &s1Grad) const
445  {
446 // grad [L_i](s0,s1) = [P_{i-1}](s0,s1) * grad s1 + [R_{i-1}](s0,s1) * grad (s0 + s1)
447  const auto & R_i_minus_1 = HomLi_dt_s0s1(i); // d/dt L_i = R_{i-1}
448  const auto & P_i_minus_1 = HomPi_s0s1(i-1);
449  for (ordinal_type d=0; d<3; d++)
450  {
451  HomLi_grad[d] = P_i_minus_1 * s1Grad[d] + R_i_minus_1 * (s0Grad[d] + s1Grad[d]);
452  }
453  }
454 
455  KOKKOS_INLINE_FUNCTION
456  void operator()( const TeamMember & teamMember ) const
457  {
458  auto pointOrdinal = teamMember.league_rank();
459  OutputScratchView scratch1D_1, scratch1D_2, scratch1D_3;
460  OutputScratchView scratch1D_4, scratch1D_5, scratch1D_6;
461  OutputScratchView scratch1D_7, scratch1D_8, scratch1D_9;
462  if (fad_size_output_ > 0) {
463  scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
464  scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
465  scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
466  scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
467  scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
468  scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
469  scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
470  scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
471  scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
472  }
473  else {
474  scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
475  scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
476  scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
477  scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
478  scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
479  scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
480  scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
481  scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
482  scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
483  }
484 
485  const auto & x = inputPoints_(pointOrdinal,0);
486  const auto & y = inputPoints_(pointOrdinal,1);
487  const auto & z = inputPoints_(pointOrdinal,2);
488 
489  // Intrepid2 uses (-1,1)^2 for x,y
490  // ESEAS uses (0,1)^2
491 
492  Kokkos::Array<PointScalar,3> coords;
493  transformToESEASPyramid<>(coords[0], coords[1], coords[2], x, y, z); // map x,y coordinates from (-z,z)^2 to (0,z)^2
494 
495  // pyramid "affine" coordinates and gradients get stored in lambda, lambdaGrad:
496  using Kokkos::Array;
497  Array<PointScalar,5> lambda;
498  Array<Kokkos::Array<PointScalar,3>,5> lambdaGrad;
499 
500  Array<Array<PointScalar,3>,2> mu;
501  Array<Array<Array<PointScalar,3>,3>,2> muGrad;
502 
503  Array<Array<PointScalar,2>,3> nu;
504  Array<Array<Array<PointScalar,3>,2>,3> nuGrad;
505 
506  affinePyramid(lambda, lambdaGrad, mu, muGrad, nu, nuGrad, coords);
507 
508  switch (opType_)
509  {
510  case OPERATOR_VALUE:
511  {
512  ordinal_type fieldOrdinalOffset = 0;
513  // quadrilateral face
514  {
515  // rename scratch1, scratch2
516  auto & Pi = scratch1D_1;
517  auto & Pj = scratch1D_2;
518 
519  auto & s0 = mu[0][0], & s1 = mu[1][0];
520  auto & s0_grad = muGrad[0][0], & s1_grad = muGrad[1][0];
521  auto & t0 = mu[0][1], & t1 = mu[1][1];
522  auto & t0_grad = muGrad[0][1], & t1_grad = muGrad[1][1];
523 
524  Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
525  Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
526 
527  const auto & muZ_0 = mu[0][2];
528  OutputScalar mu0_cubed = muZ_0 * muZ_0 * muZ_0;
529 
530  // following the ESEAS ordering: j increments first
531  for (int j=0; j<polyOrder_; j++)
532  {
533  for (int i=0; i<polyOrder_; i++)
534  {
535  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
536  V_QUAD(VQUAD, i, j,
537  Pi, s0, s1, s0_grad, s1_grad,
538  Pj, t0, t1, t0_grad, t1_grad);
539 
540  for (ordinal_type d=0; d<3; d++)
541  {
542  output_(fieldOrdinalOffset,pointOrdinal,d) = mu0_cubed * VQUAD[d];
543  }
544  fieldOrdinalOffset++;
545  }
546  }
547  }
548 
549  // triangle faces
550  {
551  /*
552  Face functions for triangular face (d,e,f) given by:
553  1/2 * ( mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a})
554  + 1/mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a} * mu_c^{zeta,xi_b}) )
555  but the division by mu_c^{zeta,xi_b} should ideally be avoided in computations; there is
556  an alternate expression, not implemented by ESEAS. We start with the above expression
557  so that we can get agreement with ESEAS. Hopefully after that we can do the alternative,
558  and confirm that we maintain agreement with ESEAS for points where ESEAS does not generate
559  nans.
560 
561 
562  where s0, s1, s2 are nu[0][a-1],nu[1][a-1],nu[2][a-1] (nu_012 above)
563  and (a,b) = (1,2) for faces 0,2; (a,b) = (2,1) for others;
564  c = 0 for faces 0,3; c = 1 for others
565  */
566  const auto & P = scratch1D_1; // for V_TRI( nu_012)
567  const auto & P_2ip1 = scratch1D_2;
568  const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
569  const auto & Pmu_2ip1 = scratch1D_4;
570  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
571  {
572  // face 0,2 --> a=1, b=2
573  // face 1,3 --> a=2, b=1
574  int a = (faceOrdinal % 2 == 0) ? 1 : 2;
575  int b = 3 - a;
576  // face 0,3 --> c=0
577  // face 1,2 --> c=1
578  int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
579 
580  const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
581  const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
582  const auto & s2 = nu[2][a-1];
583  const PointScalar jacobiScaling = s0 + s1 + s2;
584 
585  const PointScalar legendreScaling = s0 + s1;
586  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
587 
588  const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
589  const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
590  const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
591 
592  const auto & mu_c_b = mu [c][b-1];
593  const auto & mu_c_b_grad = muGrad[c][b-1];
594  const auto & mu_s0 = lambda[lambda0_index];
595  const auto & mu_s1 = lambda[lambda1_index];
596  const auto & mu_s2 = lambda[lambda2_index];
597 
598  const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
599 
600  const PointScalar muLegendreScaling = mu_s0 + mu_s1;
601  Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
602 
603  Kokkos::Array<PointScalar, 3> vectorWeight;
604  computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
605  nu[1][a-1], nuGrad[1][a-1],
606  nu[2][a-1], nuGrad[2][a-1]);
607 
608  Kokkos::Array<OutputScalar,3> VTRI_00;
609  V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
610 
611  Kokkos::Array<OutputScalar,3> EE_0;
612  E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
613 
614  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
615  {
616  for (int i=0; i<=totalPolyOrder; i++)
617  {
618  const int j = totalPolyOrder - i;
619 
620  const double alpha = i*2.0 + 1;
621  Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
622  Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
623 
624  Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
625  V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
626 
627  Kokkos::Array<OutputScalar,3> one_over_mu_VTRI_mu; // (B.42) result
628  V_TRI_B42(one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
629 
630  for (ordinal_type d=0; d<3; d++)
631  {
632  output_(fieldOrdinalOffset,pointOrdinal,d) = 0.5 * (VTRI[d] * mu_c_b + one_over_mu_VTRI_mu[d]);
633  }
634 
635  fieldOrdinalOffset++;
636  }
637  }
638  }
639  } // triangle faces block
640 
641  // interior functions
642  {
643  // label scratch
644  const auto & Li_muZ01 = scratch1D_1; // used for phi_k^E values in Family I, II, IV
645  const auto & Li_muX01 = scratch1D_2; // used for E_QUAD computations
646  const auto & Li_muY01 = scratch1D_3; // used for E_QUAD computations
647  const auto & Pi_muX01 = scratch1D_4; // used for E_QUAD computations where xi_1 comes first
648  const auto & Pi_muY01 = scratch1D_5; // used for E_QUAD computations where xi_2 comes first
649  const auto & Pi_muZ01 = scratch1D_6; // used for E_QUAD computations where xi_2 comes first
650  const auto & Li_dt_muX01 = scratch1D_7; // used for E_QUAD computations
651  const auto & Li_dt_muY01 = scratch1D_8; // used for E_QUAD computations
652  const auto & Li_dt_muZ01 = scratch1D_9; // used for E_QUAD computations
653 
654  const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
655  const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
656  const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
657  const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
658  const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
659  const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
660 
661  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
662  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
663  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
664 
665  Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
666  Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
667  Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
668 
669  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
670  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
671  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
672 
673  // FAMILIES I & II -- divergence-free families
674  // following the ESEAS ordering: k increments first
675  for (int f=0; f<2; f++)
676  {
677  const auto &s0 = (f==0) ? muX_0 : muY_0; const auto & s0_grad = (f==0) ? muX_0_grad : muY_0_grad;
678  const auto &s1 = (f==0) ? muX_1 : muY_1; const auto & s1_grad = (f==0) ? muX_1_grad : muY_1_grad;
679  const auto & t0_grad = (f==0) ? muY_0_grad : muX_0_grad;
680  const auto & t1_grad = (f==0) ? muY_1_grad : muX_1_grad;
681  const auto & Pi_s01 = (f==0) ? Pi_muX01 : Pi_muY01;
682  const auto & Pi_t01 = (f==0) ? Pi_muY01 : Pi_muX01;
683  const auto & Li_t01 = (f==0) ? Li_muY01 : Li_muX01;
684  const auto & Li_dt_t01 = (f==0) ? Li_dt_muY01 : Li_dt_muX01;
685 
686  for (int k=2; k<=polyOrder_; k++)
687  {
688  const auto & phi_k = Li_muZ01(k);
689  Kokkos::Array<OutputScalar,3> phi_k_grad;
690  computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
691 
692  Kokkos::Array<OutputScalar,3> muZ0_grad_phi_k_plus_phi_k_grad_muZ0;
693  for (ordinal_type d=0; d<3; d++)
694  {
695  muZ0_grad_phi_k_plus_phi_k_grad_muZ0[d] = muZ_0 * phi_k_grad[d] + phi_k * muZ_0_grad[d];
696  }
697 
698  // for reasons that I don't entirely understand, ESEAS switches whether i or j are the fastest-moving indices depending on whether it's family I or II. Following their code, I'm calling the outer loop variable jg, inner ig.
699  // (Cross-code comparisons are considerably simpler if we number the dofs in the same way.)
700  ordinal_type jg_min = (f==0) ? 2 : 0;
701  ordinal_type jg_max = (f==0) ? polyOrder_ : polyOrder_-1;
702  ordinal_type ig_min = (f==0) ? 0 : 2;
703  ordinal_type ig_max = (f==0) ? polyOrder_ -1 : polyOrder_;
704  for (ordinal_type jg=jg_min; jg<=jg_max; jg++)
705  {
706  for (ordinal_type ig=ig_min; ig<=ig_max; ig++)
707  {
708  const ordinal_type &i = (f==0) ? ig : jg;
709  const ordinal_type &j = (f==0) ? jg : ig;
710  Kokkos::Array<OutputScalar,3> EQUAD_ij;
711  Kokkos::Array<OutputScalar,3> curl_EQUAD_ij;
712 
713  E_QUAD(EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad, Li_t01);
714 
715  E_QUAD_CURL(curl_EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad,
716  Pi_t01, Li_t01, Li_dt_t01, t0_grad, t1_grad);
717 
718  // first term: muZ_0 phi^E_k curl EQUAD
719  // we can reuse the memory for curl_EQUAD_ij; we won't need the values there again
720  Kokkos::Array<OutputScalar,3> & firstTerm = curl_EQUAD_ij;
721  for (ordinal_type d=0; d<3; d++)
722  {
723  firstTerm[d] *= muZ_0 * phi_k;
724  }
725 
726  Kokkos::Array<OutputScalar,3> secondTerm; //(muZ0 grad phi + phi grad muZ0) x EQUAD
727 
728  cross(secondTerm, muZ0_grad_phi_k_plus_phi_k_grad_muZ0, EQUAD_ij);
729 
730  for (ordinal_type d=0; d<3; d++)
731  {
732  output_(fieldOrdinalOffset,pointOrdinal,d) = firstTerm[d] + secondTerm[d];
733  }
734 
735  fieldOrdinalOffset++;
736  }
737  }
738  }
739  } // family I, II loop
740 
741  // FAMILY III -- a divergence-free family
742  for (int j=2; j<=polyOrder_; j++)
743  {
744  // phi_ij_QUAD: phi_i(mu_X01) * phi_j(mu_Y01) // (following the ESEAS *implementation*; Fuentes et al. (p. 454) actually have the arguments reversed, which leads to a different basis ordering)
745  const auto & phi_j = Li_muY01(j);
746  Kokkos::Array<OutputScalar,3> phi_j_grad;
747  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
748  for (int i=2; i<=polyOrder_; i++)
749  {
750  const auto & phi_i = Li_muX01(i);
751  Kokkos::Array<OutputScalar,3> phi_i_grad;
752  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
753 
754  Kokkos::Array<OutputScalar,3> phi_ij_grad;
755  for (ordinal_type d=0; d<3; d++)
756  {
757  phi_ij_grad[d] = phi_i * phi_j_grad[d] + phi_j * phi_i_grad[d];
758  }
759 
760  Kokkos::Array<OutputScalar,3> cross_product; // phi_ij_grad x grad_muZ0
761  cross(cross_product, phi_ij_grad, muZ_0_grad);
762 
763  ordinal_type n = max(i,j);
764  OutputScalar weight = n * pow(muZ_0,n-1);
765  for (ordinal_type d=0; d<3; d++)
766  {
767  output_(fieldOrdinalOffset,pointOrdinal,d) = weight * cross_product[d];
768  }
769  fieldOrdinalOffset++;
770  }
771  }
772 
773  // FAMILY IV (non-trivial divergences)
774  {
775  const auto muZ_0_squared = muZ_0 * muZ_0;
776  const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
777  const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
778  const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
779  const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
780  const auto &Pi = Pi_muX01;
781  const auto &Pj = Pi_muY01;
782  for (int k=2; k<=polyOrder_; k++)
783  {
784  const auto & phi_k = Li_muZ01(k);
785  for (int j=0; j<polyOrder_; j++)
786  {
787  for (int i=0; i<polyOrder_; i++)
788  {
789  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
790  V_QUAD(VQUAD, i, j,
791  Pi, s0, s1, s0_grad, s1_grad,
792  Pj, t0, t1, t0_grad, t1_grad);
793 
794  for (int d=0; d<3; d++)
795  {
796  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_0_squared * phi_k * VQUAD[d];
797  }
798 
799  fieldOrdinalOffset++;
800  }
801  }
802  }
803  }
804 
805  // FAMILY V (non-trivial divergences)
806  {
807  for (int j=2; j<=polyOrder_; j++)
808  {
809  const auto & phi_j = Li_muY01(j);
810  Kokkos::Array<OutputScalar,3> phi_j_grad;
811  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
812 
813  for (int i=2; i<=polyOrder_; i++)
814  {
815  const auto & phi_i = Li_muX01(i);
816  Kokkos::Array<OutputScalar,3> phi_i_grad;
817  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
818 
819  const int n = max(i,j);
820  const OutputScalar muZ_1_nm1 = pow(muZ_1,n-1);
821 
822  Kokkos::Array<OutputScalar,3> VLEFTTRI;
823  V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
824 
825  for (int d=0; d<3; d++)
826  {
827  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_nm1 * VLEFTTRI[d];
828  }
829 
830  fieldOrdinalOffset++;
831  }
832  }
833  }
834 
835  // FAMILY VI (non-trivial divergences)
836  for (int i=2; i<=polyOrder_; i++)
837  {
838  const auto & phi_i = Li_muX01(i);
839  Kokkos::Array<OutputScalar,3> phi_i_grad;
840  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
841 
842  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
843  V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
844 
845  const OutputScalar muZ_1_im1 = pow(muZ_1,i-1);
846 
847  for (int d=0; d<3; d++)
848  {
849  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_im1 * VRIGHTTRI[d];
850  }
851 
852  fieldOrdinalOffset++;
853  }
854 
855  // FAMILY VII (non-trivial divergences)
856  for (int j=2; j<=polyOrder_; j++)
857  {
858  const auto & phi_j = Li_muY01(j);
859  Kokkos::Array<OutputScalar,3> phi_j_grad;
860  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
861 
862  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
863  V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
864 
865  const OutputScalar muZ_1_jm1 = pow(muZ_1,j-1);
866 
867  for (int d=0; d<3; d++)
868  {
869  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_jm1 * VRIGHTTRI[d];
870  }
871 
872  fieldOrdinalOffset++;
873  }
874  }
875 
876  // transform from ESEAS H(div) space to Intrepid2 space
877  // (what's in output_ to this point is in ESEAS space)
878  for (ordinal_type fieldOrdinal=0; fieldOrdinal<numFields_; fieldOrdinal++)
879  {
880  OutputScalar & xcomp = output_(fieldOrdinal,pointOrdinal,0);
881  OutputScalar & ycomp = output_(fieldOrdinal,pointOrdinal,1);
882  OutputScalar & zcomp = output_(fieldOrdinal,pointOrdinal,2);
883 
884  transformHDIVFromESEASPyramidValue(xcomp,ycomp,zcomp,
885  xcomp,ycomp,zcomp);
886  }
887  } // end OPERATOR_VALUE
888  break;
889  case OPERATOR_DIV:
890  {
891  ordinal_type fieldOrdinalOffset = 0;
892  // quadrilateral face
893  {
894  // rename scratch1, scratch2
895  auto & Pi = scratch1D_1;
896  auto & Pj = scratch1D_2;
897 
898  auto & s0 = mu[0][0], s1 = mu[1][0];
899  auto & s0_grad = muGrad[0][0], s1_grad = muGrad[1][0];
900  auto & t0 = mu[0][1], t1 = mu[1][1];
901  auto & t0_grad = muGrad[0][1], t1_grad = muGrad[1][1];
902 
903  Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
904  Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
905 
906  const auto & muZ0 = mu[0][2];
907  const auto & muZ0_grad = muGrad[0][2];
908  OutputScalar three_mu0_squared = 3.0 * muZ0 * muZ0;
909 
910  // following the ESEAS ordering: j increments first
911  for (int j=0; j<polyOrder_; j++)
912  {
913  for (int i=0; i<polyOrder_; i++)
914  {
915  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
916  V_QUAD(VQUAD, i, j,
917  Pi, s0, s1, s0_grad, s1_grad,
918  Pj, t0, t1, t0_grad, t1_grad);
919 
920  OutputScalar grad_muZ0_dot_VQUAD;
921  dot(grad_muZ0_dot_VQUAD, muZ0_grad, VQUAD);
922 
923  output_(fieldOrdinalOffset,pointOrdinal) = three_mu0_squared * grad_muZ0_dot_VQUAD;
924  fieldOrdinalOffset++;
925  }
926  }
927  } // end quad face block
928 
929  // triangle faces
930  {
931  const auto & P = scratch1D_1; // for V_TRI( nu_012)
932  const auto & P_2ip1 = scratch1D_2;
933  const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
934  const auto & Pmu_2ip1 = scratch1D_4;
935  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
936  {
937  // face 0,2 --> a=1, b=2
938  // face 1,3 --> a=2, b=1
939  int a = (faceOrdinal % 2 == 0) ? 1 : 2;
940  int b = 3 - a;
941  // face 0,3 --> c=0
942  // face 1,2 --> c=1
943  int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
944 
945  const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
946  const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
947  const auto & s2 = nu[2][a-1]; const auto & s2_grad = nuGrad[2][a-1];
948  const PointScalar jacobiScaling = s0 + s1 + s2; // we can actually assume that this is 1; see comment at bottom of p. 425 of Fuentes et al.
949 
950  const PointScalar legendreScaling = s0 + s1;
951  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
952 
953  const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
954  const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
955  const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
956 
957  const auto & mu_c_b = mu[c][b-1];
958  const auto & mu_c_b_grad = muGrad[c][b-1];
959 
960  const auto & mu_s0 = lambda[lambda0_index];
961  const auto & mu_s1 = lambda[lambda1_index];
962  const auto & mu_s2 = lambda[lambda2_index]; // == s2
963 
964  const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
965 
966  const PointScalar muLegendreScaling = mu_s0 + mu_s1;
967  Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
968 
969  Kokkos::Array<PointScalar, 3> vectorWeight;
970  computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
971  nu[1][a-1], nuGrad[1][a-1],
972  nu[2][a-1], nuGrad[2][a-1]);
973 
974  Kokkos::Array<PointScalar,3> & mu_s0_grad = lambdaGrad[lambda0_index];
975  Kokkos::Array<PointScalar,3> & mu_s1_grad = lambdaGrad[lambda1_index];
976  Kokkos::Array<PointScalar,3> & mu_s2_grad = lambdaGrad[lambda2_index]; // == s2_grad
977 
978  Kokkos::Array<PointScalar, 3> muVectorWeight;
979  computeFaceVectorWeight(muVectorWeight, mu_s0, mu_s0_grad,
980  mu_s1, mu_s1_grad,
981  mu_s2, mu_s2_grad);
982 
983  OutputScalar muDivWeight;
984  computeFaceDivWeight(muDivWeight, mu_s0_grad, mu_s1_grad, mu_s2_grad);
985 
986  Kokkos::Array<OutputScalar,3> VTRI_00;
987  V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
988 
989  Kokkos::Array<OutputScalar,3> EE_0;
990  E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
991 
992  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
993  {
994  for (int i=0; i<=totalPolyOrder; i++)
995  {
996  const int j = totalPolyOrder - i;
997 
998  const double alpha = i*2.0 + 1;
999  Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
1000  Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
1001 
1002  Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
1003 
1004  V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
1005 
1006  OutputScalar div_one_over_mu_VTRI_mu;
1007  V_TRI_B42_DIV(div_one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, s2_grad, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
1008 
1009  output_(fieldOrdinalOffset,pointOrdinal) = 0.5 * (dot(mu_c_b_grad, VTRI) + div_one_over_mu_VTRI_mu);
1010 
1011  fieldOrdinalOffset++;
1012  }
1013  }
1014  }
1015  } // end triangle face block
1016 
1017  {
1018  // FAMILY I -- divergence free
1019  // following the ESEAS ordering: k increments first
1020  for (int k=2; k<=polyOrder_; k++)
1021  {
1022  for (int j=2; j<=polyOrder_; j++)
1023  {
1024  for (int i=0; i<polyOrder_; i++)
1025  {
1026  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1027  fieldOrdinalOffset++;
1028  }
1029  }
1030  }
1031 
1032  // FAMILY II -- divergence free
1033  // following the ESEAS ordering: k increments first
1034  for (int k=2; k<=polyOrder_; k++)
1035  {
1036  for (int j=2; j<=polyOrder_; j++)
1037  {
1038  for (int i=0; i<polyOrder_; i++)
1039  {
1040  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1041  fieldOrdinalOffset++;
1042  }
1043  }
1044  }
1045 
1046  // FAMILY III -- divergence free
1047  for (int j=2; j<=polyOrder_; j++)
1048  {
1049  for (int i=2; i<=polyOrder_; i++)
1050  {
1051  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1052  fieldOrdinalOffset++;
1053  }
1054  }
1055 
1056  const auto & Li_muZ01 = scratch1D_1; // used in Family IV
1057  const auto & Li_muX01 = scratch1D_2; // used in Family V
1058  const auto & Li_muY01 = scratch1D_3; // used in Family V
1059  const auto & Pi_muX01 = scratch1D_4; // used in Family IV
1060  const auto & Pi_muY01 = scratch1D_5; // used in Family IV
1061  const auto & Pi_muZ01 = scratch1D_6; // used in Family IV
1062  const auto & Li_dt_muX01 = scratch1D_7; // used in Family V
1063  const auto & Li_dt_muY01 = scratch1D_8; // used in Family V
1064  const auto & Li_dt_muZ01 = scratch1D_9; // used in Family IV
1065 
1066  const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
1067  const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
1068  const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
1069  const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
1070  const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
1071  const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
1072 
1073  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1074  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1075  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1076 
1077  Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1078  Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1079  Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1080 
1081  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1082  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1083  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1084 
1085  // FAMILY IV -- non-trivial divergences
1086  {
1087  const auto muZ_0_squared = muZ_0 * muZ_0;
1088  const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
1089  const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
1090  const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
1091  const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
1092  const auto &Pi = Pi_muX01;
1093  const auto &Pj = Pi_muY01;
1094 
1095  for (int k=2; k<=polyOrder_; k++)
1096  {
1097  const auto & phi_k = Li_muZ01(k);
1098  Kokkos::Array<OutputScalar,3> phi_k_grad;
1099  computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
1100  for (int j=0; j<polyOrder_; j++)
1101  {
1102  for (int i=0; i<polyOrder_; i++)
1103  {
1104  Kokkos::Array<OutputScalar,3> firstTerm{0,0,0}; // muZ_0_squared * grad phi_k + 2 muZ_0 * phi_k * grad muZ_0
1105  for (int d=0; d<3; d++)
1106  {
1107  firstTerm[d] += muZ_0_squared * phi_k_grad[d] + 2. * muZ_0 * phi_k * muZ_0_grad[d];
1108  }
1109  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
1110  V_QUAD(VQUAD, i, j,
1111  Pi, s0, s1, s0_grad, s1_grad,
1112  Pj, t0, t1, t0_grad, t1_grad);
1113 
1114  OutputScalar divValue;
1115  dot(divValue, firstTerm, VQUAD);
1116  output_(fieldOrdinalOffset,pointOrdinal) = divValue;
1117 
1118  fieldOrdinalOffset++;
1119  }
1120  }
1121  }
1122  }
1123 
1124  // FAMILY V -- non-trivial divergences
1125  {
1126  for (int j=2; j<=polyOrder_; j++)
1127  {
1128  const auto & phi_j = Li_muX01(j);
1129  Kokkos::Array<OutputScalar,3> phi_j_grad;
1130  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1131 
1132  for (int i=2; i<=polyOrder_; i++)
1133  {
1134  const auto & phi_i = Li_muY01(i);
1135  Kokkos::Array<OutputScalar,3> phi_i_grad;
1136  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1137 
1138  const int n = max(i,j);
1139  const OutputScalar muZ_1_nm2 = pow(muZ_1,n-2);
1140 
1141  Kokkos::Array<OutputScalar,3> VLEFTTRI;
1142  V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1143 
1144  OutputScalar dot_product;
1145  dot(dot_product, muZ_1_grad, VLEFTTRI);
1146  output_(fieldOrdinalOffset,pointOrdinal) = (n-1) * muZ_1_nm2 * dot_product;
1147 
1148  fieldOrdinalOffset++;
1149  }
1150  }
1151  }
1152 
1153  // FAMILY VI (non-trivial divergences)
1154  for (int i=2; i<=polyOrder_; i++)
1155  {
1156  const auto & phi_i = Li_muX01(i);
1157  Kokkos::Array<OutputScalar,3> phi_i_grad;
1158  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1159 
1160  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1161  V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
1162 
1163  OutputScalar dot_product;
1164  dot(dot_product, muZ_1_grad, VRIGHTTRI);
1165 
1166  const OutputScalar muZ_1_im2 = pow(muZ_1,i-2);
1167  output_(fieldOrdinalOffset,pointOrdinal) = (i-1) * muZ_1_im2 * dot_product;
1168 
1169  fieldOrdinalOffset++;
1170  }
1171 
1172  // FAMILY VII (non-trivial divergences)
1173  for (int j=2; j<=polyOrder_; j++)
1174  {
1175  const auto & phi_j = Li_muY01(j);
1176  Kokkos::Array<OutputScalar,3> phi_j_grad;
1177  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1178 
1179  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1180  V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1181 
1182  OutputScalar dot_product;
1183  dot(dot_product, muZ_1_grad, VRIGHTTRI);
1184 
1185  const OutputScalar muZ_1_jm2 = pow(muZ_1,j-2);
1186  output_(fieldOrdinalOffset,pointOrdinal) = (j-1) * muZ_1_jm2 * dot_product;
1187 
1188  fieldOrdinalOffset++;
1189  }
1190 
1191  } // end interior function block
1192 
1193  // transform from ESEAS H(div) space to Intrepid2 space
1194  // (what's in output_ to this point is in ESEAS space)
1195  for (ordinal_type fieldOrdinal=0; fieldOrdinal<numFields_; fieldOrdinal++)
1196  {
1197  OutputScalar & div = output_(fieldOrdinal,pointOrdinal);
1198 
1199  transformHDIVFromESEASPyramidDIV(div,
1200  div);
1201  }
1202  } // end OPERATOR_DIV block
1203  break;
1204  case OPERATOR_GRAD:
1205  case OPERATOR_D1:
1206  case OPERATOR_D2:
1207  case OPERATOR_D3:
1208  case OPERATOR_D4:
1209  case OPERATOR_D5:
1210  case OPERATOR_D6:
1211  case OPERATOR_D7:
1212  case OPERATOR_D8:
1213  case OPERATOR_D9:
1214  case OPERATOR_D10:
1215  INTREPID2_TEST_FOR_ABORT( true,
1216  ">>> ERROR: (Intrepid2::Hierarchical_HDIV_PYR_Functor) Computing of second and higher-order derivatives is not currently supported");
1217  default:
1218  // unsupported operator type
1219  device_assert(false);
1220  }
1221  }
1222 
1223  // Provide the shared memory capacity.
1224  // This function takes the team_size as an argument,
1225  // which allows team_size-dependent allocations.
1226  size_t team_shmem_size (int team_size) const
1227  {
1228  // we use shared memory to create a fast buffer for basis computations
1229  size_t shmem_size = 0;
1230  if (fad_size_output_ > 0)
1231  {
1232  // 1D scratch views (we have up to 9 of them):
1233  shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
1234  }
1235  else
1236  {
1237  // 1D scratch views (we have up to 9 of them):
1238  shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1);
1239  }
1240 
1241  return shmem_size;
1242  }
1243  };
1244 
1256  template<typename DeviceType,
1257  typename OutputScalar = double,
1258  typename PointScalar = double,
1259  bool useCGBasis = true> // if useCGBasis is true, basis functions are associated with either faces or the interior. If false, basis functions are all associated with interior
1261  : public Basis<DeviceType,OutputScalar,PointScalar>
1262  {
1263  public:
1265 
1266  using typename BasisBase::OrdinalTypeArray1DHost;
1267  using typename BasisBase::OrdinalTypeArray2DHost;
1268 
1269  using typename BasisBase::OutputViewType;
1270  using typename BasisBase::PointViewType;
1271  using typename BasisBase::ScalarViewType;
1272 
1273  using typename BasisBase::ExecutionSpace;
1274 
1275  protected:
1276  int polyOrder_; // the maximum order of the polynomial
1277  EPointType pointType_;
1278  public:
1289  HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
1290  :
1291  polyOrder_(polyOrder),
1292  pointType_(pointType)
1293  {
1294  INTREPID2_TEST_FOR_EXCEPTION(pointType!=POINTTYPE_DEFAULT,std::invalid_argument,"PointType not supported");
1295  const auto & p = polyOrder;
1296  this->basisCardinality_ = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
1297  this->basisDegree_ = p;
1298  this->basisCellTopologyKey_ = shards::Pyramid<>::key;
1299  this->basisType_ = BASIS_FEM_HIERARCHICAL;
1300  this->basisCoordinates_ = COORDINATES_CARTESIAN;
1301  this->functionSpace_ = FUNCTION_SPACE_HDIV;
1302 
1303  const int degreeLength = 1;
1304  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial degree lookup", this->basisCardinality_, degreeLength);
1305  this->fieldOrdinalH1PolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial H^1 degree lookup", this->basisCardinality_, degreeLength);
1306 
1307  int fieldOrdinalOffset = 0;
1308 
1309  // **** face functions **** //
1310  // one quad face
1311  const int numFunctionsPerQuadFace = p*p;
1312 
1313  // following the ESEAS ordering: j increments first
1314  for (int j=0; j<p; j++)
1315  {
1316  for (int i=0; i<p; i++)
1317  {
1318  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = std::max(i,j);
1319  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = std::max(i,j)+1;
1320  fieldOrdinalOffset++;
1321  }
1322  }
1323 
1324  const int numFunctionsPerTriFace = 2 * p * (p+1) / 4;
1325  const int numTriFaces = 4;
1326  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
1327  {
1328  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
1329  {
1330  const int totalFaceDofs = (totalPolyOrder+1) * (totalPolyOrder+2) / 2; // number of dofs for which i+j <= totalPolyOrder
1331  const int totalFaceDofsPrevious = totalPolyOrder * (totalPolyOrder+1) / 2; // number of dofs for which i+j <= totalPolyOrder-1
1332  const int faceDofsForPolyOrder = totalFaceDofs - totalFaceDofsPrevious; // number of dofs for which i+j == totalPolyOrder
1333  for (int i=0; i<faceDofsForPolyOrder; i++)
1334  {
1335  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = totalPolyOrder;
1336  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = totalPolyOrder+1;
1337  fieldOrdinalOffset++;
1338  }
1339  }
1340  }
1341 
1342  // **** interior functions **** //
1343  const int numFunctionsPerVolume = 3 * p * p * (p-1);
1344 
1345  // FAMILY I
1346  // following the ESEAS ordering: k increments first
1347  for (int k=2; k<=polyOrder_; k++)
1348  {
1349  for (int j=2; j<=polyOrder_; j++)
1350  {
1351  for (int i=0; i<polyOrder_; i++)
1352  {
1353  const int max_jk = std::max(j,k);
1354  const int max_ijk = std::max(max_jk,i);
1355  const int max_ip1jk = std::max(max_jk,i+1);
1356  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1357  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1358  fieldOrdinalOffset++;
1359  }
1360  }
1361  }
1362 
1363  // FAMILY II
1364  for (int k=2; k<=polyOrder_; k++)
1365  {
1366  // ESEAS reverses i,j loop ordering for family II, relative to family I.
1367  // We follow ESEAS for convenience of cross-code comparison.
1368  for (int i=0; i<polyOrder_; i++)
1369  {
1370  for (int j=2; j<=polyOrder_; j++)
1371  {
1372  const int max_jk = std::max(j,k);
1373  const int max_ijk = std::max(max_jk,i);
1374  const int max_ip1jk = std::max(max_jk,i+1);
1375  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1376  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1377  fieldOrdinalOffset++;
1378  }
1379  }
1380  }
1381 
1382  // FAMILY III
1383  for (int j=2; j<=polyOrder_; j++)
1384  {
1385  for (int i=2; i<=polyOrder_; i++)
1386  {
1387  const int max_ij = std::max(i,j);
1388  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1389  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1390  fieldOrdinalOffset++;
1391  }
1392  }
1393 
1394  // FAMILY IV
1395  for (int k=2; k<=polyOrder_; k++)
1396  {
1397  for (int j=0; j<polyOrder_; j++)
1398  {
1399  for (int i=0; i<polyOrder_; i++)
1400  {
1401  const int max_jk = std::max(j,k);
1402  const int max_ijk = std::max(max_jk,i);
1403  const int max_ip1jp1k = std::max(std::max(j+1,k),i+1);
1404  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1405  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jp1k;
1406  fieldOrdinalOffset++;
1407  }
1408  }
1409  }
1410 
1411  // FAMILY V
1412  for (int j=2; j<=polyOrder_; j++)
1413  {
1414  for (int i=2; i<=polyOrder_; i++)
1415  {
1416  const int max_ij = std::max(i,j);
1417  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1418  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1419  fieldOrdinalOffset++;
1420  }
1421  }
1422 
1423  // FAMILY VI
1424  for (int i=2; i<=polyOrder_; i++)
1425  {
1426  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = i;
1427  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = i;
1428  fieldOrdinalOffset++;
1429  }
1430 
1431  // FAMILY VII
1432  for (int j=2; j<=polyOrder_; j++)
1433  {
1434  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = j;
1435  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = j;
1436  fieldOrdinalOffset++;
1437  }
1438 
1439  if (fieldOrdinalOffset != this->basisCardinality_)
1440  {
1441  std::cout << "Internal error: basis enumeration is incorrect; fieldOrdinalOffset = " << fieldOrdinalOffset;
1442  std::cout << "; basisCardinality_ = " << this->basisCardinality_ << std::endl;
1443 
1444  INTREPID2_TEST_FOR_EXCEPTION(fieldOrdinalOffset != this->basisCardinality_, std::invalid_argument, "Internal error: basis enumeration is incorrect");
1445  }
1446 
1447  // initialize tags
1448  {
1449  // Intrepid2 vertices:
1450  // 0: (-1,-1, 0)
1451  // 1: ( 1,-1, 0)
1452  // 2: ( 1, 1, 0)
1453  // 3: (-1, 1, 0)
1454  // 4: ( 0, 0, 1)
1455 
1456  // ESEAS vertices:
1457  // 0: ( 0, 0, 0)
1458  // 1: ( 1, 0, 0)
1459  // 2: ( 1, 1, 0)
1460  // 3: ( 0, 1, 0)
1461  // 4: ( 0, 0, 1)
1462 
1463  // The edge numbering appears to match between ESEAS and Intrepid2
1464 
1465  // ESEAS numbers pyramid faces differently from Intrepid2
1466  // See BlendProjectPyraTF in ESEAS.
1467  // See PyramidFaceNodeMap in Shards_BasicTopologies
1468  // ESEAS: 0123, 014, 124, 324, 034
1469  // Intrepid2: 014, 124, 234, 304, 0321
1470 
1471  const int intrepid2FaceOrdinals[5] {4,0,1,2,3}; // index is the ESEAS face ordinal; value is the intrepid2 ordinal
1472 
1473  const auto & cardinality = this->basisCardinality_;
1474 
1475  // Basis-dependent initializations
1476  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
1477  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
1478  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
1479  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
1480 
1481  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
1482  const int faceDim = 2, volumeDim = 3;
1483 
1484  if (useCGBasis) {
1485  {
1486  int tagNumber = 0;
1487  {
1488  // quad face
1489  const int faceOrdinalESEAS = 0;
1490  const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1491 
1492  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerQuadFace; functionOrdinal++)
1493  {
1494  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1495  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1496  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1497  tagView(tagNumber*tagSize+3) = numFunctionsPerQuadFace; // total number of dofs on this face
1498  tagNumber++;
1499  }
1500  }
1501  for (int triFaceOrdinalESEAS=0; triFaceOrdinalESEAS<numTriFaces; triFaceOrdinalESEAS++)
1502  {
1503  const int faceOrdinalESEAS = triFaceOrdinalESEAS + 1;
1504  const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1505  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerTriFace; functionOrdinal++)
1506  {
1507  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1508  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1509  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1510  tagView(tagNumber*tagSize+3) = numFunctionsPerTriFace; // total number of dofs on this face
1511  tagNumber++;
1512  }
1513  }
1514  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerVolume; functionOrdinal++)
1515  {
1516  tagView(tagNumber*tagSize+0) = volumeDim; // volume dimension
1517  tagView(tagNumber*tagSize+1) = 0; // volume id
1518  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1519  tagView(tagNumber*tagSize+3) = numFunctionsPerVolume; // total number of dofs in this volume
1520  tagNumber++;
1521  }
1522  INTREPID2_TEST_FOR_EXCEPTION(tagNumber != this->basisCardinality_, std::invalid_argument, "Internal error: basis tag enumeration is incorrect");
1523  }
1524  } else {
1525  for (ordinal_type i=0;i<cardinality;++i) {
1526  tagView(i*tagSize+0) = volumeDim; // volume dimension
1527  tagView(i*tagSize+1) = 0; // volume ordinal
1528  tagView(i*tagSize+2) = i; // local dof id
1529  tagView(i*tagSize+3) = cardinality; // total number of dofs on this face
1530  }
1531  }
1532 
1533  // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
1534  // tags are constructed on host
1535  this->setOrdinalTagData(this->tagToOrdinal_,
1536  this->ordinalToTag_,
1537  tagView,
1538  this->basisCardinality_,
1539  tagSize,
1540  posScDim,
1541  posScOrd,
1542  posDfOrd);
1543  }
1544  }
1545 
1550  const char* getName() const override {
1551  return "Intrepid2_HierarchicalBasis_HDIV_PYR";
1552  }
1553 
1556  virtual bool requireOrientation() const override {
1557  return (this->getDegree() > 2);
1558  }
1559 
1560  // since the getValues() below only overrides the FEM variant, we specify that
1561  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
1562  // (It's an error to use the FVD variant on this basis.)
1563  using BasisBase::getValues;
1564 
1583  virtual void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1584  const EOperator operatorType = OPERATOR_VALUE ) const override
1585  {
1586  auto numPoints = inputPoints.extent_int(0);
1587 
1589 
1590  FunctorType functor(operatorType, outputValues, inputPoints, polyOrder_);
1591 
1592  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputScalar>();
1593  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointScalar>();
1594  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1595  const int teamSize = 1; // because of the way the basis functions are computed, we don't have a second level of parallelism...
1596 
1597  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(numPoints,teamSize,vectorSize);
1598  Kokkos::parallel_for("Hierarchical_HDIV_PYR_Functor", policy, functor);
1599  }
1600 
1609  BasisPtr<DeviceType,OutputScalar,PointScalar>
1610  getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{
1611  const auto & p = this->basisDegree_;
1612  if (subCellDim == 2)
1613  {
1614  if (subCellOrd == 4) // quad basis
1615  {
1617  using HVOL_QUAD = Basis_Derived_HVOL_QUAD<HVOL_LINE>;
1618  return Teuchos::rcp(new HVOL_QUAD(p-1));
1619  }
1620  else // tri basis
1621  {
1623  return Teuchos::rcp(new HVOL_Tri(p-1));
1624  }
1625  }
1626  INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds");
1627  }
1628 
1633  virtual BasisPtr<typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar>
1634  getHostBasis() const override {
1635  using HostDeviceType = typename Kokkos::HostSpace::device_type;
1637  return Teuchos::rcp( new HostBasisType(polyOrder_, pointType_) );
1638  }
1639  };
1640 } // end namespace Intrepid2
1641 
1642 // do ETI with default (double) type
1644 
1645 #endif /* Intrepid2_HierarchicalBasis_HDIV_PYR_h */
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.
unsigned basisCellTopologyKey_
Identifier of the base topology of the cells for which the basis is defined. See the Shards package f...
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line...
BasisPtr< DeviceType, OutputScalar, PointScalar > getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override
returns the basis associated to a subCell.
Defines several coordinates and their gradients on the pyramid; maps from Intrepid2 (shards) pyramid ...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
Functor for computing values for the HierarchicalBasis_HDIV_PYR class.
Kokkos::DynRankView< scalarType, Kokkos::LayoutStride, DeviceType > ScalarViewType
View type for scalars.
Free functions, callable from device code, that implement various polynomials useful in basis definit...
const char * getName() const override
Returns basis name.
KOKKOS_INLINE_FUNCTION void E_QUAD(Kokkos::Array< OutputScalar, 3 > &EQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &HomPi_s01, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &HomLi_t01) const
EFunctionSpace functionSpace_
The function space in which the basis is defined.
ordinal_type basisDegree_
Degree of the largest complete polynomial space that can be represented by the basis.
virtual KOKKOS_INLINE_FUNCTION void getValues(OutputViewType, const PointViewType, const EOperator, const typename Kokkos::TeamPolicy< ExecutionSpace >::member_type &teamMember, const typename ExecutionSpace::scratch_memory_space &scratchStorage, const ordinal_type subcellDim=-1, const ordinal_type subcellOrdinal=-1) const
Team-level evaluation of basis functions on a reference cell.
Header function for Intrepid2::Util class and other utility functions.
KOKKOS_INLINE_FUNCTION void V_LEFT_TRI(Kokkos::Array< OutputScalar, 3 > &VLEFTTRI, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &phi_j, const Kokkos::Array< OutputScalar, 3 > &phi_j_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{}.
HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
Constructor.
KOKKOS_INLINE_FUNCTION void cross(Kokkos::Array< OutputScalar, 3 > &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
cross product: c = a x b
Kokkos::View< ordinal_type **, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray2DHost
View type for 2d host array.
Basis defining integrated Legendre basis on the line, a polynomial subspace of H(grad) on the line...
Basis defining Legendre basis on the line, a polynomial subspace of L^2 (a.k.a. H(vol)) on the line...
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
KOKKOS_INLINE_FUNCTION void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const Kokkos::Array< OutputScalar, 3 > &s2_grad, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2). ...
H(vol) basis on the line based on Legendre polynomials.
ordinal_type basisCardinality_
Cardinality of the basis, i.e., the number of basis functions/degrees-of-freedom. ...
KOKKOS_INLINE_FUNCTION void V_TRI_B42(Kokkos::Array< OutputScalar, 3 > &VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2). ...
OrdinalTypeArray3DHost tagToOrdinal_
DoF tag to ordinal lookup table.
KOKKOS_INLINE_FUNCTION void dot(OutputScalar &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
dot product: c = a b
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line...
virtual BasisPtr< typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
KOKKOS_INLINE_FUNCTION void V_QUAD(Kokkos::Array< OutputScalar, 3 > &VQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &PHom_s, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &PHom_t, const PointScalar &t0, const PointScalar &t1, const Kokkos::Array< PointScalar, 3 > &t0_grad, const Kokkos::Array< PointScalar, 3 > &t1_grad) const
OrdinalTypeArray2DHost ordinalToTag_
&quot;true&quot; if tagToOrdinal_ and ordinalToTag_ have been initialized
ordinal_type getDegree() const
Returns the degree of the basis.
ECoordinates basisCoordinates_
The coordinate system for which the basis is defined.
virtual bool requireOrientation() const override
True if orientation is required.
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
OrdinalTypeArray2DHost fieldOrdinalH1PolynomialDegree_
H^1 polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
OrdinalTypeArray2DHost fieldOrdinalPolynomialDegree_
Polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
Basis defining Legendre basis on the line, a polynomial subspace of H(vol) on the line: extension to ...
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
void setOrdinalTagData(OrdinalTypeView3D &tagToOrdinal, OrdinalTypeView2D &ordinalToTag, const OrdinalTypeView1D tags, const ordinal_type basisCard, const ordinal_type tagSize, const ordinal_type posScDim, const ordinal_type posScOrd, const ordinal_type posDfOrd)
Fills ordinalToTag_ and tagToOrdinal_ by basis-specific tag data.
virtual void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
H(vol) basis on the triangle based on integrated Legendre polynomials.
KOKKOS_INLINE_FUNCTION void V_RIGHT_TRI(Kokkos::Array< OutputScalar, 3 > &VRIGHTTRI, const OutputScalar &mu1, const Kokkos::Array< OutputScalar, 3 > &mu1_grad, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{}.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
Header file for the abstract base class Intrepid2::Basis.
typename DeviceType::execution_space ExecutionSpace
(Kokkos) Execution space for basis.
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.