Smith  0.1
Smith is an implicit thermal structural mechanics simulation code.
quadrilateral_L2.inl
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Smith Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
13 // this specialization defines shape functions (and their gradients) that
14 // interpolate at Gauss-Lobatto nodes for the appropriate polynomial order
15 //
16 // note: mfem assumes the parent element domain is [0,1]x[0,1]
17 // for additional information on the finite_element concept requirements, see finite_element.hpp
19 template <int p, int c>
20 struct finite_element<mfem::Geometry::SQUARE, L2<p, c> > {
21  static constexpr auto geometry = mfem::Geometry::SQUARE;
22  static constexpr auto family = Family::L2;
23  static constexpr int components = c;
24  static constexpr int dim = 2;
25  static constexpr int n = (p + 1);
26  static constexpr int ndof = (p + 1) * (p + 1);
27 
28  static constexpr int VALUE = 0, GRADIENT = 1;
29  static constexpr int SOURCE = 0, FLUX = 1;
30 
31  using residual_type =
32  typename std::conditional<components == 1, tensor<double, ndof>, tensor<double, ndof, components> >::type;
33 
34  using dof_type = tensor<double, c, p + 1, p + 1>;
35  using dof_type_if = tensor<double, c, 2, p + 1, p + 1>;
36 
37  using value_type = typename std::conditional<components == 1, double, tensor<double, components> >::type;
38  using derivative_type =
39  typename std::conditional<components == 1, tensor<double, dim>, tensor<double, components, dim> >::type;
40  using qf_input_type = tuple<value_type, derivative_type>;
41 
42  /*
43 
44  interpolation nodes and their associated numbering:
45 
46  linear
47  2-----------3
48  | |
49  | |
50  | |
51  | |
52  | |
53  0-----------1
54 
55 
56  quadratic
57  6-----7-----8
58  | |
59  | |
60  3 4 5
61  | |
62  | |
63  0-----1-----2
64 
65 
66  cubic
67  12-13--14--15
68  | |
69  8 9 10 11
70  | |
71  4 5 6 7
72  | |
73  0---1---2---3
74 
75  */
76 
77  SMITH_HOST_DEVICE static constexpr tensor<double, ndof> shape_functions(tensor<double, dim> xi)
78  {
79  auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
80  auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
81 
82  int count = 0;
83 
84  tensor<double, ndof> N{};
85  for (int j = 0; j < p + 1; j++) {
86  for (int i = 0; i < p + 1; i++) {
87  N[count++] = N_xi[i] * N_eta[j];
88  }
89  }
90  return N;
91  }
92 
93  SMITH_HOST_DEVICE static constexpr tensor<double, ndof, dim> shape_function_gradients(tensor<double, dim> xi)
94  {
95  auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
96  auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
97  auto dN_xi = GaussLobattoInterpolationDerivative<p + 1>(xi[0]);
98  auto dN_eta = GaussLobattoInterpolationDerivative<p + 1>(xi[1]);
99 
100  int count = 0;
101 
102  tensor<double, ndof, dim> dN{};
103  for (int j = 0; j < p + 1; j++) {
104  for (int i = 0; i < p + 1; i++) {
105  dN[count++] = {dN_xi[i] * N_eta[j], N_xi[i] * dN_eta[j]};
106  }
107  }
108  return dN;
109  }
110 
121  template <bool apply_weights, int q>
122  static constexpr auto calculate_B()
123  {
124  constexpr auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
125  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
126  tensor<double, q, n> B{};
127  for (int i = 0; i < q; i++) {
128  B[i] = GaussLobattoInterpolation<n>(points1D[i]);
129  if constexpr (apply_weights) B[i] = B[i] * weights1D[i];
130  }
131  return B;
132  }
133 
144  template <bool apply_weights, int q>
145  static constexpr auto calculate_G()
146  {
147  constexpr auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
148  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
149  tensor<double, q, n> G{};
150  for (int i = 0; i < q; i++) {
151  G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
152  if constexpr (apply_weights) G[i] = G[i] * weights1D[i];
153  }
154  return G;
155  }
156 
157  template <typename in_t, int q>
158  static auto batch_apply_shape_fn(int j, tensor<in_t, q * q> input, const TensorProductQuadratureRule<q>&)
159  {
160  static constexpr bool apply_weights = false;
161  static constexpr auto B = calculate_B<apply_weights, q>();
162  static constexpr auto G = calculate_G<apply_weights, q>();
163 
164  int jx = j % n;
165  int jy = j / n;
166 
167  using source_t = decltype(get<0>(get<0>(in_t{})) + dot(get<1>(get<0>(in_t{})), tensor<double, 2>{}));
168  using flux_t = decltype(get<0>(get<1>(in_t{})) + dot(get<1>(get<1>(in_t{})), tensor<double, 2>{}));
169 
170  tensor<tuple<source_t, flux_t>, q * q> output;
171 
172  for (int qy = 0; qy < q; qy++) {
173  for (int qx = 0; qx < q; qx++) {
174  double phi_j = B(qx, jx) * B(qy, jy);
175  tensor<double, dim> dphi_j_dxi = {G(qx, jx) * B(qy, jy), B(qx, jx) * G(qy, jy)};
176 
177  int Q = qy * q + qx;
178  const auto& d00 = get<0>(get<0>(input(Q)));
179  const auto& d01 = get<1>(get<0>(input(Q)));
180  const auto& d10 = get<0>(get<1>(input(Q)));
181  const auto& d11 = get<1>(get<1>(input(Q)));
182 
183  output[Q] = {d00 * phi_j + dot(d01, dphi_j_dxi), d10 * phi_j + dot(d11, dphi_j_dxi)};
184  }
185  }
186 
187  return output;
188  }
189 
190  template <typename T, int q>
191  static auto batch_apply_shape_fn_interior_face(int j, tensor<T, q * q> input, const TensorProductQuadratureRule<q>&)
192  {
193  static constexpr bool apply_weights = false;
194  static constexpr auto B = calculate_B<apply_weights, q>();
195 
196  using source0_t = decltype(get<0>(get<0>(T{})) + get<1>(get<0>(T{})));
197  using source1_t = decltype(get<0>(get<1>(T{})) + get<1>(get<1>(T{})));
198 
199  int jx = j % n;
200  int jy = (j % ndof) / n;
201  int s = j / ndof;
202 
203  tensor<tuple<source0_t, source1_t>, q * q> output;
204 
205  for (int qy = 0; qy < q; qy++) {
206  for (int qx = 0; qx < q; qx++) {
207  double phi0_j = B(qx, jx) * B(qy, jy) * (s == 0);
208  double phi1_j = B(qx, jx) * B(qy, jy) * (s == 1);
209 
210  int Q = qy * q + qx;
211  const auto& d00 = get<0>(get<0>(input(Q)));
212  const auto& d01 = get<1>(get<0>(input(Q)));
213  const auto& d10 = get<0>(get<1>(input(Q)));
214  const auto& d11 = get<1>(get<1>(input(Q)));
215 
216  output[Q] = {d00 * phi0_j + d01 * phi1_j, d10 * phi0_j + d11 * phi1_j};
217  }
218  }
219 
220  return output;
221  }
222 
223  // we want to compute the following:
224  //
225  // X_q(u, v) := (B(u, i) * B(v, j)) * X_e(i, j)
226  //
227  // where
228  // X_q(u, v) are the quadrature-point values at position {u, v},
229  // B(u, i) is the i^{th} 1D interpolation/differentiation (shape) function,
230  // evaluated at the u^{th} 1D quadrature point, and
231  // X_e(i, j) are the values at node {i, j} to be interpolated
232  //
233  // this algorithm carries out the above calculation in 2 steps:
234  //
235  // A(dy, qx) := B(qx, dx) * X_e(dy, dx)
236  // X_q(qy, qx) := B(qy, dy) * A(dy, qx)
237  template <int q>
238  SMITH_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule<q>&)
239  {
240  static constexpr bool apply_weights = false;
241  static constexpr auto B = calculate_B<apply_weights, q>();
242  static constexpr auto G = calculate_G<apply_weights, q>();
243 
244  tensor<double, c, q, q> value{};
245  tensor<double, c, dim, q, q> gradient{};
246 
247  // apply the shape functions
248  for (int i = 0; i < c; i++) {
249  auto A0 = contract<1, 1>(X[i], B);
250  auto A1 = contract<1, 1>(X[i], G);
251 
252  value(i) = contract<0, 1>(A0, B);
253  gradient(i, 0) = contract<0, 1>(A1, B);
254  gradient(i, 1) = contract<0, 1>(A0, G);
255  }
256 
257  // transpose the quadrature data into a flat tensor of tuples
258  union {
259  tensor<qf_input_type, q * q> one_dimensional;
260  tensor<tuple<tensor<double, c>, tensor<double, c, dim> >, q, q> two_dimensional;
261  } output;
262 
263  for (int qy = 0; qy < q; qy++) {
264  for (int qx = 0; qx < q; qx++) {
265  for (int i = 0; i < c; i++) {
266  get<VALUE>(output.two_dimensional(qy, qx))[i] = value(i, qy, qx);
267  for (int j = 0; j < dim; j++) {
268  get<GRADIENT>(output.two_dimensional(qy, qx))[i][j] = gradient(i, j, qy, qx);
269  }
270  }
271  }
272  }
273 
274  return output.one_dimensional;
275  }
276 
277  // overload for two-sided interior face kernels
278  template <int q>
279  SMITH_HOST_DEVICE static auto interpolate([[maybe_unused]] const dof_type_if& X,
280  const TensorProductQuadratureRule<q>&)
281  {
282  static constexpr bool apply_weights = false;
283  static constexpr auto B = calculate_B<apply_weights, q>();
284 
285  tensor<tuple<value_type, value_type>, q * q> output;
286 
287  tensor<double, c, q, q> value{};
288 
289  // side 0
290  for (int i = 0; i < c; i++) {
291  auto A0 = contract<1, 1>(X(i, 0), B);
292  value(i) = contract<0, 1>(A0, B);
293  }
294 
295  for (int qy = 0; qy < q; qy++) {
296  for (int qx = 0; qx < q; qx++) {
297  if constexpr (c == 1) {
298  get<0>(output[qy * q + qx]) = value(0, qy, qx);
299  } else {
300  for (int i = 0; i < c; i++) {
301  get<0>(output[qy * q + qx])[i] = value(i, qy, qx);
302  }
303  }
304  }
305  }
306 
307  // side 1
308  for (int i = 0; i < c; i++) {
309  auto A0 = contract<1, 1>(X(i, 1), B);
310  value(i) = contract<0, 1>(A0, B);
311  }
312 
313  for (int qy = 0; qy < q; qy++) {
314  for (int qx = 0; qx < q; qx++) {
315  if constexpr (c == 1) {
316  get<1>(output[qy * q + qx]) = value(0, qy, qx);
317  } else {
318  for (int i = 0; i < c; i++) {
319  get<1>(output[qy * q + qx])[i] = value(i, qy, qx);
320  }
321  }
322  }
323  }
324 
325  return output;
326  }
327 
328  // source can be one of: {zero, double, tensor<double,dim>, tensor<double,dim,dim>}
329  // flux can be one of: {zero, tensor<double,dim>, tensor<double,dim,dim>, tensor<double,dim,dim,dim>,
330  // tensor<double,dim,dim,dim>}
331  template <typename source_type, typename flux_type, int q>
332  SMITH_HOST_DEVICE static void integrate(const tensor<tuple<source_type, flux_type>, q * q>& qf_output,
333  const TensorProductQuadratureRule<q>&, dof_type* element_residual,
334  int step = 1)
335  {
336  if constexpr (is_zero<source_type>{} && is_zero<flux_type>{}) {
337  return;
338  }
339 
340  constexpr int ntrial = std::max(size(source_type{}), size(flux_type{}) / dim) / c;
341 
342  using s_buffer_type = std::conditional_t<is_zero<source_type>{}, zero, tensor<double, q, q> >;
343  using f_buffer_type = std::conditional_t<is_zero<flux_type>{}, zero, tensor<double, dim, q, q> >;
344 
345  static constexpr bool apply_weights = true;
346  static constexpr auto B = calculate_B<apply_weights, q>();
347  static constexpr auto G = calculate_G<apply_weights, q>();
348 
349  for (int j = 0; j < ntrial; j++) {
350  for (int i = 0; i < c; i++) {
351  s_buffer_type source;
352  f_buffer_type flux;
353 
354  for (int qy = 0; qy < q; qy++) {
355  for (int qx = 0; qx < q; qx++) {
356  int Q = qy * q + qx;
357  if constexpr (!is_zero<source_type>{}) {
358  source(qy, qx) = reinterpret_cast<const double*>(&get<SOURCE>(qf_output[Q]))[i * ntrial + j];
359  }
360  if constexpr (!is_zero<flux_type>{}) {
361  for (int k = 0; k < dim; k++) {
362  flux(k, qy, qx) = reinterpret_cast<const double*>(&get<FLUX>(qf_output[Q]))[(i * dim + k) * ntrial + j];
363  }
364  }
365  }
366  }
367 
368  auto A0 = contract<1, 0>(source, B) + contract<1, 0>(flux(0), G);
369  auto A1 = contract<1, 0>(flux(1), B);
370 
371  element_residual[j * step](i) += contract<0, 0>(A0, B) + contract<0, 0>(A1, G);
372  }
373  }
374  }
375 
376  template <typename T, int q>
377  SMITH_HOST_DEVICE static void integrate(const tensor<tuple<T, T>, q * q>& qf_output,
378  const TensorProductQuadratureRule<q>&, dof_type_if* element_residual,
379  [[maybe_unused]] int step = 1)
380  {
381  constexpr int ntrial = size(T{}) / c;
382  static constexpr bool apply_weights = true;
383  static constexpr auto B = calculate_B<apply_weights, q>();
384 
385  for (int j = 0; j < ntrial; j++) {
386  for (int i = 0; i < c; i++) {
387  tensor<double, q, q> source;
388 
389  // side 0
390  {
391  for (int qy = 0; qy < q; qy++) {
392  for (int qx = 0; qx < q; qx++) {
393  int Q = qy * q + qx;
394  source(qy, qx) = reinterpret_cast<const double*>(&get<0>(qf_output[Q]))[i * ntrial + j];
395  }
396  }
397 
398  auto A0 = contract<1, 0>(source, B);
399  element_residual[j * step](i, 0) += contract<0, 0>(A0, B);
400  }
401 
402  // side 1
403  {
404  for (int qy = 0; qy < q; qy++) {
405  for (int qx = 0; qx < q; qx++) {
406  int Q = qy * q + qx;
407  source(qy, qx) = reinterpret_cast<const double*>(&get<1>(qf_output[Q]))[i * ntrial + j];
408  }
409  }
410 
411  auto A0 = contract<1, 0>(source, B);
412  element_residual[j * step](i, 1) += contract<0, 0>(A0, B);
413  }
414  }
415  }
416  }
417 
418 #if 0
419 
420  template <int q>
421  static SMITH_DEVICE auto interpolate(const dof_type& X, const tensor<double, dim, dim>& J,
422  const TensorProductQuadratureRule<q>& rule, cache_type<q>& A)
423  {
424  int tidx = threadIdx.x % q;
425  int tidy = threadIdx.x / q;
426 
427  static constexpr auto points1D = GaussLegendreNodes<q>();
428  static constexpr auto B_ = [=]() {
429  tensor<double, q, n> B{};
430  for (int i = 0; i < q; i++) {
431  B[i] = GaussLobattoInterpolation<n>(points1D[i]);
432  }
433  return B;
434  }();
435 
436  static constexpr auto G_ = [=]() {
437  tensor<double, q, n> G{};
438  for (int i = 0; i < q; i++) {
439  G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
440  }
441  return G;
442  }();
443 
444  __shared__ tensor<double, q, n> B;
445  __shared__ tensor<double, q, n> G;
446  for (int entry = threadIdx.x; entry < n * q; entry += q * q) {
447  int i = entry % n;
448  int j = entry / n;
449  B(j, i) = B_(j, i);
450  G(j, i) = G_(j, i);
451  }
452  __syncthreads();
453 
454  tuple<tensor<double, c>, tensor<double, c, dim> > qf_input{};
455 
456  for (int i = 0; i < c; i++) {
457  for (int dy = tidy; dy < n; dy += q) {
458  for (int qx = tidx; qx < q; qx += q) {
459  double sum[2]{};
460  for (int dx = 0; dx < n; dx++) {
461  sum[0] += B(qx, dx) * X(i, dy, dx);
462  sum[1] += G(qx, dx) * X(i, dy, dx);
463  }
464  A(0, dy, qx) = sum[0];
465  A(1, dy, qx) = sum[1];
466  }
467  }
468  __syncthreads();
469 
470  for (int qy = tidy; qy < q; qy += q) {
471  for (int qx = tidx; qx < q; qx += q) {
472  for (int dy = 0; dy < n; dy++) {
473  get<0>(qf_input)[i] += B(qy, dy) * A(0, dy, qx);
474  get<1>(qf_input)[i][0] += B(qy, dy) * A(1, dy, qx);
475  get<1>(qf_input)[i][1] += G(qy, dy) * A(0, dy, qx);
476  }
477  }
478  }
479  }
480 
481  get<1>(qf_input) = dot(get<1>(qf_input), inv(J));
482 
483  return qf_input;
484  }
485 
486  template <typename T1, typename T2, int q>
487  static SMITH_DEVICE void integrate(tuple<T1, T2>& response, const tensor<double, dim, dim>& J,
488  const TensorProductQuadratureRule<q>& rule, cache_type<q>& A, dof_type& residual)
489  {
490  int tidx = threadIdx.x % q;
491  int tidy = threadIdx.x / q;
492 
493  static constexpr auto points1D = GaussLegendreNodes<q>();
494  static constexpr auto weights1D = GaussLegendreWeights<q>();
495  static constexpr auto B_ = [=]() {
496  tensor<double, q, n> B{};
497  for (int i = 0; i < q; i++) {
498  B[i] = GaussLobattoInterpolation<n>(points1D[i]);
499  }
500  return B;
501  }();
502 
503  static constexpr auto G_ = [=]() {
504  tensor<double, q, n> G{};
505  for (int i = 0; i < q; i++) {
506  G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
507  }
508  return G;
509  }();
510 
511  __shared__ tensor<double, q, n> B;
512  __shared__ tensor<double, q, n> G;
513  for (int entry = threadIdx.x; entry < n * q; entry += q * q) {
514  int i = entry % n;
515  int j = entry / n;
516  B(j, i) = B_(j, i);
517  G(j, i) = G_(j, i);
518  }
519  __syncthreads();
520 
521  auto dv = det(J) * weights1D[tidx] * weights1D[tidy];
522 
523  get<0>(response) = get<0>(response) * dv;
524  get<1>(response) = dot(get<1>(response), inv(transpose(J))) * dv;
525 
526  for (int i = 0; i < c; i++) {
527  // this first contraction is performed a little differently, since `response` is not
528  // in shared memory, so each thread can only access its own values
529  for (int qy = tidy; qy < q; qy += q) {
530  for (int dx = tidx; dx < n; dx += q) {
531  A(0, dx, qy) = 0.0;
532  A(1, dx, qy) = 0.0;
533  }
534  }
535  __syncthreads();
536 
537  for (int offset = 0; offset < n; offset++) {
538  int dx = (tidx + offset) % n;
539  auto sum = B(tidx, dx) * get<0>(response)(i) + G(tidx, dx) * get<1>(response)(i, 0);
540  atomicAdd(&A(0, dx, tidy), sum);
541  atomicAdd(&A(1, dx, tidy), B(tidx, dx) * get<1>(response)(i, 1));
542  }
543  __syncthreads();
544 
545  for (int dy = tidy; dy < n; dy += q) {
546  for (int dx = tidx; dx < n; dx += q) {
547  double sum = 0.0;
548  for (int qy = 0; qy < q; qy++) {
549  sum += B(qy, dy) * A(0, dx, qy);
550  sum += G(qy, dy) * A(1, dx, qy);
551  }
552  residual(i, dy, dx) += sum;
553  }
554  }
555  }
556  }
557 
558 #endif
559 };
#define SMITH_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38
#define SMITH_DEVICE
Macro that evaluates to __device__ when compiling with nvcc and does nothing on a host compiler.
Definition: accelerator.hpp:46
constexpr SMITH_HOST_DEVICE auto inv(const isotropic_tensor< T, m, m > &I)
return the inverse of an isotropic tensor
constexpr SMITH_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:376
SMITH_HOST_DEVICE auto max(dual< gradient_type > a, double b)
Implementation of max for dual numbers.
Definition: dual.hpp:229
constexpr SMITH_HOST_DEVICE int size(const tensor< T, n... > &)
returns the total number of stored values in a tensor
Definition: tensor.hpp:1932
constexpr SMITH_HOST_DEVICE auto transpose(const isotropic_tensor< T, m, m > &I)
return the transpose of an isotropic tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
constexpr SMITH_HOST_DEVICE auto det(const isotropic_tensor< T, m, m > &I)
compute the determinant of an isotropic tensor
constexpr SMITH_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor