Smith  0.1
Smith is an implicit thermal structural mechanics simulation code.
quadrilateral_Hcurl.inl
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Smith Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
13 // this specialization defines shape functions (and their curls) that
14 // interpolate at Gauss-Lobatto nodes for closed intervals, and Gauss-Legendre
15 // nodes for open intervals.
16 //
17 // note 1: mfem assumes the parent element domain is [0,1]x[0,1]
18 // note 2: dofs are numbered by direction and then lexicographically in space.
19 // see below
20 // note 3: since this is a 2D element type, the "curl" returned is just the out-of-plane component,
21 // rather than 3D vector along the z-direction.
22 // for additional information on the finite_element concept requirements, see finite_element.hpp
24 template <int p>
25 struct finite_element<mfem::Geometry::SQUARE, Hcurl<p> > {
26  static constexpr auto geometry = mfem::Geometry::SQUARE;
27  static constexpr auto family = Family::HCURL;
28  static constexpr int dim = 2;
29  static constexpr int n = (p + 1);
30  static constexpr int ndof = 2 * p * (p + 1);
31  static constexpr int components = 1;
32 
33  static constexpr int VALUE = 0, CURL = 1;
34  static constexpr int SOURCE = 0, FLUX = 1;
35 
36  using residual_type =
37  typename std::conditional<components == 1, tensor<double, ndof>, tensor<double, ndof, components> >::type;
38 
39  // this is how mfem provides the data to us for these elements
40  // if, instead, it was stored as simply tensor< double, 2, p + 1, p >,
41  // the interpolation/integrate implementation would be considerably shorter
42  struct dof_type {
43  tensor<double, p + 1, p> x;
44  tensor<double, p, p + 1> y;
45  };
46  using dof_type_if = dof_type;
47 
48  template <int q>
49  using cpu_batched_values_type = tensor<tensor<double, 2>, q, q>;
50 
51  template <int q>
52  using cpu_batched_derivatives_type = tensor<double, q, q>;
53 
54  static constexpr auto directions = [] {
55  int dof_per_direction = p * (p + 1);
56 
57  tensor<double, ndof, dim> directions{};
58  for (int i = 0; i < dof_per_direction; i++) {
59  directions[i + 0 * dof_per_direction] = {1.0, 0.0};
60  directions[i + 1 * dof_per_direction] = {0.0, 1.0};
61  }
62  return directions;
63  }();
64 
65  static constexpr auto nodes = [] {
66  auto legendre_nodes = GaussLegendreNodes<p, mfem::Geometry::SEGMENT>();
67  auto lobatto_nodes = GaussLobattoNodes<p + 1>();
68 
69  tensor<double, ndof, dim> nodes{};
70 
71  int count = 0;
72  for (int j = 0; j < p + 1; j++) {
73  for (int i = 0; i < p; i++) {
74  nodes[count++] = {legendre_nodes[i], lobatto_nodes[j]};
75  }
76  }
77 
78  for (int j = 0; j < p; j++) {
79  for (int i = 0; i < p + 1; i++) {
80  nodes[count++] = {lobatto_nodes[i], legendre_nodes[j]};
81  }
82  }
83 
84  return nodes;
85  }();
86 
97  template <bool apply_weights, int q>
98  static constexpr auto calculate_B1()
99  {
100  constexpr auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
101  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
102  tensor<double, q, p> B1{};
103  for (int i = 0; i < q; i++) {
104  B1[i] = GaussLegendreInterpolation<p>(points1D[i]);
105  if constexpr (apply_weights) B1[i] = B1[i] * weights1D[i];
106  }
107  return B1;
108  }
109 
120  template <bool apply_weights, int q>
121  static constexpr auto calculate_B2()
122  {
123  constexpr auto points1D = GaussLegendreNodes<q>();
124  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q>();
125  tensor<double, q, p + 1> B2{};
126  for (int i = 0; i < q; i++) {
127  B2[i] = GaussLobattoInterpolation<p + 1>(points1D[i]);
128  if constexpr (apply_weights) B2[i] = B2[i] * weights1D[i];
129  }
130  return B2;
131  }
132 
143  template <bool apply_weights, int q>
144  static constexpr auto calculate_G2()
145  {
146  constexpr auto points1D = GaussLegendreNodes<q>();
147  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q>();
148  tensor<double, q, p + 1> G2{};
149  for (int i = 0; i < q; i++) {
150  G2[i] = GaussLobattoInterpolationDerivative<p + 1>(points1D[i]);
151  if constexpr (apply_weights) G2[i] = G2[i] * weights1D[i];
152  }
153  return G2;
154  }
155 
156  /*
157 
158  interpolate nodes/directions and their associated numbering:
159 
160  linear
161 
162  o-----→-----o o-----1-----o
163  | | | |
164  | | | |
165  ↑ ↑ 2 3
166  | | | |
167  | | | |
168  o-----→-----o o-----0-----o
169 
170 
171  quadratic
172 
173  o---→---→---o o---4---5---o
174  | | | |
175  ↑ ↑ ↑ 9 10 11
176  | → → | | 2 3 |
177  ↑ ↑ ↑ 6 7 8
178  | | | |
179  o---→---→---o o---0---1---o
180 
181 
182  cubic
183 
184  o--→--→--→--o o--9-10-11--o
185  ↑ ↑ ↑ ↑ 20 21 22 23
186  | → → → | | 6 7 8 |
187  ↑ ↑ ↑ ↑ 16 17 18 19
188  | → → → | | 3 4 5 |
189  ↑ ↑ ↑ ↑ 12 13 14 15
190  o--→--→--→--o o--0--1--2--o
191 
192  */
193 
194  SMITH_HOST_DEVICE static constexpr tensor<double, ndof, dim> shape_functions(tensor<double, dim> xi)
195  {
196  int count = 0;
197  tensor<double, ndof, dim> N{};
198 
199  // do all the x-facing nodes first
200  tensor<double, p + 1> N_closed = GaussLobattoInterpolation<p + 1>(xi[1]);
201  tensor<double, p> N_open = GaussLegendreInterpolation<p>(xi[0]);
202  for (int j = 0; j < p + 1; j++) {
203  for (int i = 0; i < p; i++) {
204  N[count++] = {N_open[i] * N_closed[j], 0.0};
205  }
206  }
207 
208  // then all the y-facing nodes
209  N_closed = GaussLobattoInterpolation<p + 1>(xi[0]);
210  N_open = GaussLegendreInterpolation<p>(xi[1]);
211  for (int j = 0; j < p; j++) {
212  for (int i = 0; i < p + 1; i++) {
213  N[count++] = {0.0, N_closed[i] * N_open[j]};
214  }
215  }
216  return N;
217  }
218 
219  // the curl of a 2D vector field is entirely out-of-plane, so we return just that component
220  SMITH_HOST_DEVICE static constexpr tensor<double, ndof> shape_function_curl(tensor<double, dim> xi)
221  {
222  int count = 0;
223  tensor<double, ndof> curl_z{};
224 
225  // do all the x-facing nodes first
226  tensor<double, p + 1> dN_closed = GaussLobattoInterpolationDerivative<p + 1>(xi[1]);
227  tensor<double, p> N_open = GaussLegendreInterpolation<p>(xi[0]);
228  for (int j = 0; j < p + 1; j++) {
229  for (int i = 0; i < p; i++) {
230  curl_z[count++] = -N_open[i] * dN_closed[j];
231  }
232  }
233 
234  // then all the y-facing nodes
235  dN_closed = GaussLobattoInterpolationDerivative<p + 1>(xi[0]);
236  N_open = GaussLegendreInterpolation<p>(xi[1]);
237  for (int j = 0; j < p; j++) {
238  for (int i = 0; i < p + 1; i++) {
239  curl_z[count++] = dN_closed[i] * N_open[j];
240  }
241  }
242 
243  return curl_z;
244  }
245 
246  template <typename in_t, int q>
247  static auto batch_apply_shape_fn(int j, tensor<in_t, q * q> input, const TensorProductQuadratureRule<q>&)
248  {
249  constexpr bool apply_weights = false;
250  constexpr tensor<double, q, p> B1 = calculate_B1<apply_weights, q>();
251  constexpr tensor<double, q, p + 1> B2 = calculate_B2<apply_weights, q>();
252  constexpr tensor<double, q, p + 1> G2 = calculate_G2<apply_weights, q>();
253 
254  int jx, jy;
255  int dir = j / ((p + 1) * p);
256  if (dir == 0) {
257  jx = j % p;
258  jy = j / p;
259  } else {
260  jx = (j % ((p + 1) * p)) % n;
261  jy = (j % ((p + 1) * p)) / n;
262  }
263 
264  using source_t = decltype(dot(get<0>(get<0>(in_t{})), tensor<double, 2>{}) + get<1>(get<0>(in_t{})) * double{});
265  using flux_t = decltype(dot(get<0>(get<1>(in_t{})), tensor<double, 2>{}) + get<1>(get<1>(in_t{})) * double{});
266 
267  tensor<tuple<source_t, flux_t>, q * q> output;
268 
269  for (int qy = 0; qy < q; qy++) {
270  for (int qx = 0; qx < q; qx++) {
271  tensor<double, 2> phi_j{(dir == 0) * B1(qx, jx) * B2(qy, jy), (dir == 1) * B1(qy, jy) * B2(qx, jx)};
272 
273  double curl_phi_j = (dir == 0) * -B1(qx, jx) * G2(qy, jy) + (dir == 1) * B1(qy, jy) * G2(qx, jx);
274 
275  int Q = qy * q + qx;
276  const auto& d00 = get<0>(get<0>(input(Q)));
277  const auto& d01 = get<1>(get<0>(input(Q)));
278  const auto& d10 = get<0>(get<1>(input(Q)));
279  const auto& d11 = get<1>(get<1>(input(Q)));
280 
281  output[Q] = {dot(d00, phi_j) + d01 * curl_phi_j, dot(d10, phi_j) + d11 * curl_phi_j};
282  }
283  }
284 
285  return output;
286  }
287 
288  template <int q>
289  SMITH_HOST_DEVICE static auto interpolate(const dof_type& element_values, const TensorProductQuadratureRule<q>&)
290  {
291  constexpr bool apply_weights = false;
292  constexpr tensor<double, q, p> B1 = calculate_B1<apply_weights, q>();
293  constexpr tensor<double, q, p + 1> B2 = calculate_B2<apply_weights, q>();
294  constexpr tensor<double, q, p + 1> G2 = calculate_G2<apply_weights, q>();
295 
296  tensor<double, 2, q, q> value{};
297  tensor<double, q, q> curl{};
298 
299  // to clarify which contractions correspond to which spatial dimensions
300  constexpr int x = 1, y = 0;
301 
302  auto A = contract<x, 1>(element_values.x, B1);
303  value[0] = contract<y, 1>(A, B2);
304  curl -= contract<y, 1>(A, G2);
305 
306  A = contract<y, 1>(element_values.y, B1);
307  value[1] = contract<x, 1>(A, B2);
308  curl += contract<x, 1>(A, G2);
309 
310  tensor<tuple<tensor<double, 2>, double>, q * q> qf_inputs;
311 
312  int count = 0;
313  for (int qy = 0; qy < q; qy++) {
314  for (int qx = 0; qx < q; qx++) {
315  for (int i = 0; i < dim; i++) {
316  get<VALUE>(qf_inputs(count))[i] = value[i](qy, qx);
317  }
318  get<CURL>(qf_inputs(count)) = curl(qy, qx);
319  count++;
320  }
321  }
322 
323  return qf_inputs;
324  }
325 
326  template <typename source_type, typename flux_type, int q>
327  SMITH_HOST_DEVICE static void integrate(const tensor<tuple<source_type, flux_type>, q * q>& qf_output,
328  const TensorProductQuadratureRule<q>&, dof_type* element_residual,
329  [[maybe_unused]] int step = 1)
330  {
331  constexpr bool apply_weights = true;
332  constexpr tensor<double, q, p> B1 = calculate_B1<apply_weights, q>();
333  constexpr tensor<double, q, p + 1> B2 = calculate_B2<apply_weights, q>();
334  constexpr tensor<double, q, p + 1> G2 = calculate_G2<apply_weights, q>();
335 
336  tensor<double, 2, q, q> source{};
337  tensor<double, q, q> flux{};
338 
339  for (int qy = 0; qy < q; qy++) {
340  for (int qx = 0; qx < q; qx++) {
341  int Q = qy * q + qx;
342  tensor<double, dim> s{get<SOURCE>(qf_output[Q])};
343  for (int i = 0; i < dim; i++) {
344  source(i, qy, qx) = s[i];
345  }
346  flux(qy, qx) = get<FLUX>(qf_output[Q]);
347  }
348  }
349 
350  // to clarify which contractions correspond to which spatial dimensions
351  constexpr int x = 1, y = 0;
352 
353  auto A = contract<y, 0>(source[0], B2) - contract<y, 0>(flux, G2);
354  element_residual[0].x += contract<x, 0>(A, B1);
355 
356  A = contract<x, 0>(source[1], B2) + contract<x, 0>(flux, G2);
357  element_residual[0].y += contract<y, 0>(A, B1);
358  }
359 
360 #if 0
361 
362  template <int q>
363  static SMITH_DEVICE auto interpolate(const dof_type& element_values, const tensor<double, dim, dim>& J,
364  const TensorProductQuadratureRule<q>& rule, cache_type<q>& A)
365  {
366  int tidx = threadIdx.x % q;
367  int tidy = threadIdx.x / q;
368 
369  static constexpr auto points1D = GaussLegendreNodes<q>();
370 
371  static constexpr auto B1_ = [=]() {
372  tensor<double, q, p> B1{};
373  for (int i = 0; i < q; i++) {
374  B1[i] = GaussLegendreInterpolation<p>(points1D[i]);
375  }
376  return B1;
377  }();
378 
379  static constexpr auto B2_ = [=]() {
380  tensor<double, q, n> B2{};
381  for (int i = 0; i < q; i++) {
382  B2[i] = GaussLobattoInterpolation<n>(points1D[i]);
383  }
384  return B2;
385  }();
386 
387  static constexpr auto G2_ = [=]() {
388  tensor<double, q, n> G2{};
389  for (int i = 0; i < q; i++) {
390  G2[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
391  }
392  return G2;
393  }();
394 
395  __shared__ tensor<double, q, p> B1;
396  __shared__ tensor<double, q, n> B2;
397  __shared__ tensor<double, q, n> G2;
398  for (int entry = threadIdx.x; entry < n * q; entry += q * q) {
399  int i = entry % n;
400  int j = entry / n;
401  if (i < p) {
402  B1(j, i) = B1_(j, i);
403  }
404  B2(j, i) = B2_(j, i);
405  G2(j, i) = G2_(j, i);
406  }
407  __syncthreads();
408 
409  tuple<tensor<double, dim>, double> qf_input{};
410 
414  for (int dy = tidy; dy < p + 1; dy += q) {
415  for (int qx = tidx; qx < q; qx += q) {
416  double sum = 0.0;
417  for (int dx = 0; dx < p; dx++) {
418  sum += B1(qx, dx) * element_values.x(dy, dx);
419  }
420  A(dy, qx) = sum;
421  }
422  }
423  __syncthreads();
424 
425  for (int qy = tidy; qy < q; qy += q) {
426  for (int qx = tidx; qx < q; qx += q) {
427  double sum[2]{};
428  for (int dy = 0; dy < (p + 1); dy++) {
429  sum[0] += B2(qy, dy) * A(dy, qx);
430  sum[1] += G2(qy, dy) * A(dy, qx);
431  }
432  smith::get<0>(qf_input)[0] += sum[0];
433  smith::get<1>(qf_input) -= sum[1];
434  }
435  }
436  __syncthreads();
437 
441  for (int dx = tidx; dx < p + 1; dx += q) {
442  for (int qy = tidy; qy < q; qy += q) {
443  double sum = 0.0;
444  for (int dy = 0; dy < p; dy++) {
445  sum += B1(qy, dy) * element_values.y(dy, dx);
446  }
447  A(dx, qy) = sum;
448  }
449  }
450  __syncthreads();
451 
452  for (int qy = tidy; qy < q; qy += q) {
453  for (int qx = tidx; qx < q; qx += q) {
454  double sum[3]{};
455  for (int dx = 0; dx < (p + 1); dx++) {
456  sum[0] += B2(qx, dx) * A(dx, qy);
457  sum[1] += G2(qx, dx) * A(dx, qy);
458  }
459  smith::get<0>(qf_input)[1] += sum[0];
460  smith::get<1>(qf_input) += sum[1];
461  }
462  }
463 
464  // apply covariant Piola transformation to go
465  // from parent element -> physical element
466  smith::get<0>(qf_input) = linear_solve(transpose(J), smith::get<0>(qf_input));
467  smith::get<1>(qf_input) = smith::get<1>(qf_input) / det(J);
468 
469  return qf_input;
470  }
471 
472  template <typename T1, typename T2, int q>
473  static SMITH_DEVICE void integrate(tuple<T1, T2>& response, const tensor<double, dim, dim>& J,
474  const TensorProductQuadratureRule<q>& rule, cache_type<q>& A, dof_type& residual)
475  {
476  int tidx = threadIdx.x % q;
477  int tidy = threadIdx.x / q;
478 
479  static constexpr auto points1D = GaussLegendreNodes<q>();
480  static constexpr auto weights1D = GaussLegendreWeights<q>();
481 
482  static constexpr auto B1_ = [=]() {
483  tensor<double, q, p> B1{};
484  for (int i = 0; i < q; i++) {
485  B1[i] = GaussLegendreInterpolation<p>(points1D[i]);
486  }
487  return B1;
488  }();
489 
490  static constexpr auto B2_ = [=]() {
491  tensor<double, q, n> B2{};
492  for (int i = 0; i < q; i++) {
493  B2[i] = GaussLobattoInterpolation<n>(points1D[i]);
494  }
495  return B2;
496  }();
497 
498  static constexpr auto G2_ = [=]() {
499  tensor<double, q, n> G2{};
500  for (int i = 0; i < q; i++) {
501  G2[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
502  }
503  return G2;
504  }();
505 
506  __shared__ tensor<double, q, p> B1;
507  __shared__ tensor<double, q, n> B2;
508  __shared__ tensor<double, q, n> G2;
509  for (int entry = threadIdx.x; entry < n * q; entry += q * q) {
510  int i = entry % n;
511  int j = entry / n;
512  if (i < p) {
513  B1(j, i) = B1_(j, i);
514  }
515  B2(j, i) = B2_(j, i);
516  G2(j, i) = G2_(j, i);
517  }
518  __syncthreads();
519 
520  // transform the source and flux terms from values on the physical element,
521  // to values on the parent element. Also, the source/flux values are scaled
522  // according to the weight of their quadrature point, so that when we add them
523  // together, it approximates the integral over the element
524  auto detJ = det(J);
525  auto dv = detJ * weights1D[tidx] * weights1D[tidy];
526 
527  get<0>(response) = linear_solve(J, get<0>(response)) * dv;
528  get<1>(response) = get<1>(response) * (dv / detJ);
529 
533 
534  // this first contraction is performed a little differently, since `response` is not
535  // in shared memory, so each thread can only access its own values
536  for (int qy = tidy; qy < q; qy += q) {
537  for (int dx = tidx; dx < n; dx += q) {
538  A(dx, qy) = 0.0;
539  }
540  }
541  __syncthreads();
542 
543  for (int offset = 0; offset < n; offset++) {
544  int dy = (tidy + offset) % n;
545  auto sum = B2(tidy, dy) * get<0>(response)[0] - G2(tidy, dy) * get<1>(response);
546  atomicAdd(&A(dy, tidx), sum);
547  }
548  __syncthreads();
549 
550  for (int dy = tidy; dy < p + 1; dy += q) {
551  for (int dx = tidx; dx < p; dx += q) {
552  double sum = 0.0;
553  for (int qx = 0; qx < q; qx++) {
554  sum += B1(qx, dx) * A(dy, qx);
555  }
556  residual.x(dy, dx) += sum;
557  }
558  }
559  __syncthreads();
560 
564 
565  // this first contraction is performed a little differently, since `response` is not
566  // in shared memory, so each thread can only access its own values
567  for (int qy = tidy; qy < q; qy += q) {
568  for (int dx = tidx; dx < n; dx += q) {
569  A(dx, qy) = 0.0;
570  }
571  }
572  __syncthreads();
573 
574  for (int offset = 0; offset < n; offset++) {
575  int dx = (tidx + offset) % n;
576  auto sum = B2(tidx, dx) * get<0>(response)[1] + G2(tidx, dx) * get<1>(response);
577  atomicAdd(&A(dx, tidy), sum);
578  }
579  __syncthreads();
580 
581  for (int dy = tidy; dy < p; dy += q) {
582  for (int dx = tidx; dx < p + 1; dx += q) {
583  double sum = 0.0;
584  for (int qy = 0; qy < q; qy++) {
585  sum += B1(qy, dy) * A(dx, qy);
586  }
587  residual.y(dy, dx) += sum;
588  }
589  }
590  }
591 
592 #endif
593 };
#define SMITH_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38
#define SMITH_DEVICE
Macro that evaluates to __device__ when compiling with nvcc and does nothing on a host compiler.
Definition: accelerator.hpp:46
constexpr SMITH_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:376
constexpr SMITH_HOST_DEVICE auto transpose(const isotropic_tensor< T, m, m > &I)
return the transpose of an isotropic tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
constexpr SMITH_HOST_DEVICE auto det(const isotropic_tensor< T, m, m > &I)
compute the determinant of an isotropic tensor
constexpr SMITH_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
constexpr SMITH_HOST_DEVICE auto linear_solve(const LuFactorization< S, n > &lu_factors, const tensor< T, n, m... > &b)
Definition: tensor.hpp:1619