Serac  0.1
Serac is an implicit thermal strucural mechanics simulation code.
quadrilateral_H1.inl
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Serac Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
13 // this specialization defines shape functions (and their gradients) that
14 // interpolate at Gauss-Lobatto nodes for the appropriate polynomial order
15 //
16 // note: mfem assumes the parent element domain is [0,1]x[0,1]
17 // for additional information on the finite_element concept requirements, see finite_element.hpp
19 template <int p, int c>
20 struct finite_element<mfem::Geometry::SQUARE, H1<p, c> > {
21  static constexpr auto geometry = mfem::Geometry::SQUARE;
22  static constexpr auto family = Family::H1;
23  static constexpr int components = c;
24  static constexpr int dim = 2;
25  static constexpr int order = p;
26  static constexpr int n = (p + 1);
27  static constexpr int ndof = (p + 1) * (p + 1);
28 
29  static constexpr int VALUE = 0, GRADIENT = 1;
30  static constexpr int SOURCE = 0, FLUX = 1;
31 
32  using residual_type =
33  typename std::conditional<components == 1, tensor<double, ndof>, tensor<double, ndof, components> >::type;
34 
35  using dof_type = tensor<double, c, p + 1, p + 1>;
36  using dof_type_if = dof_type;
37 
38  using value_type = typename std::conditional<components == 1, double, tensor<double, components> >::type;
39  using derivative_type =
40  typename std::conditional<components == 1, tensor<double, dim>, tensor<double, components, dim> >::type;
41  using qf_input_type = tuple<value_type, derivative_type>;
42 
43  /*
44 
45  interpolation nodes and their associated numbering:
46 
47  linear
48  2-----------3
49  | |
50  | |
51  | |
52  | |
53  | |
54  0-----------1
55 
56 
57  quadratic
58  6-----7-----8
59  | |
60  | |
61  3 4 5
62  | |
63  | |
64  0-----1-----2
65 
66 
67  cubic
68  12-13--14--15
69  | |
70  8 9 10 11
71  | |
72  4 5 6 7
73  | |
74  0---1---2---3
75 
76  */
77 
78  SERAC_HOST_DEVICE static constexpr tensor<double, ndof> shape_functions(tensor<double, dim> xi)
79  {
80  auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
81  auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
82 
83  int count = 0;
84 
85  tensor<double, ndof> N{};
86  for (int j = 0; j < p + 1; j++) {
87  for (int i = 0; i < p + 1; i++) {
88  N[count++] = N_xi[i] * N_eta[j];
89  }
90  }
91  return N;
92  }
93 
94  SERAC_HOST_DEVICE static constexpr tensor<double, ndof, dim> shape_function_gradients(tensor<double, dim> xi)
95  {
96  auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
97  auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
98  auto dN_xi = GaussLobattoInterpolationDerivative<p + 1>(xi[0]);
99  auto dN_eta = GaussLobattoInterpolationDerivative<p + 1>(xi[1]);
100 
101  int count = 0;
102 
103  tensor<double, ndof, dim> dN{};
104  for (int j = 0; j < p + 1; j++) {
105  for (int i = 0; i < p + 1; i++) {
106  dN[count++] = {dN_xi[i] * N_eta[j], N_xi[i] * dN_eta[j]};
107  }
108  }
109  return dN;
110  }
111 
122  template <bool apply_weights, int q>
123  static constexpr auto calculate_B()
124  {
125  constexpr auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
126  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
127  tensor<double, q, n> B{};
128  for (int i = 0; i < q; i++) {
129  B[i] = GaussLobattoInterpolation<n>(points1D[i]);
130  if constexpr (apply_weights) B[i] = B[i] * weights1D[i];
131  }
132  return B;
133  }
134 
145  template <bool apply_weights, int q>
146  static constexpr auto calculate_G()
147  {
148  constexpr auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
149  [[maybe_unused]] constexpr auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
150  tensor<double, q, n> G{};
151  for (int i = 0; i < q; i++) {
152  G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
153  if constexpr (apply_weights) G[i] = G[i] * weights1D[i];
154  }
155  return G;
156  }
157 
158  template <typename in_t, int q>
159  static auto batch_apply_shape_fn(int j, tensor<in_t, q * q> input, const TensorProductQuadratureRule<q>&)
160  {
161  static constexpr bool apply_weights = false;
162  static constexpr auto B = calculate_B<apply_weights, q>();
163  static constexpr auto G = calculate_G<apply_weights, q>();
164 
165  int jx = j % n;
166  int jy = j / n;
167 
168  using source_t = decltype(get<0>(get<0>(in_t{})) + dot(get<1>(get<0>(in_t{})), tensor<double, 2>{}));
169  using flux_t = decltype(get<0>(get<1>(in_t{})) + dot(get<1>(get<1>(in_t{})), tensor<double, 2>{}));
170 
171  tensor<tuple<source_t, flux_t>, q * q> output;
172 
173  for (int qy = 0; qy < q; qy++) {
174  for (int qx = 0; qx < q; qx++) {
175  double phi_j = B(qx, jx) * B(qy, jy);
176  tensor<double, dim> dphi_j_dxi = {G(qx, jx) * B(qy, jy), B(qx, jx) * G(qy, jy)};
177 
178  int Q = qy * q + qx;
179  const auto& d00 = get<0>(get<0>(input(Q)));
180  const auto& d01 = get<1>(get<0>(input(Q)));
181  const auto& d10 = get<0>(get<1>(input(Q)));
182  const auto& d11 = get<1>(get<1>(input(Q)));
183 
184  output[Q] = {d00 * phi_j + dot(d01, dphi_j_dxi), d10 * phi_j + dot(d11, dphi_j_dxi)};
185  }
186  }
187 
188  return output;
189  }
190 
191  // we want to compute the following:
192  //
193  // X_q(u, v) := (B(u, i) * B(v, j)) * X_e(i, j)
194  //
195  // where
196  // X_q(u, v) are the quadrature-point values at position {u, v},
197  // B(u, i) is the i^{th} 1D interpolation/differentiation (shape) function,
198  // evaluated at the u^{th} 1D quadrature point, and
199  // X_e(i, j) are the values at node {i, j} to be interpolated
200  //
201  // this algorithm carries out the above calculation in 2 steps:
202  //
203  // A(dy, qx) := B(qx, dx) * X_e(dy, dx)
204  // X_q(qy, qx) := B(qy, dy) * A(dy, qx)
205  template <int q>
206  SERAC_HOST_DEVICE static auto interpolate(const dof_type& X, const TensorProductQuadratureRule<q>&)
207  {
208  static constexpr bool apply_weights = false;
209  static constexpr auto B = calculate_B<apply_weights, q>();
210  static constexpr auto G = calculate_G<apply_weights, q>();
211 
212  tensor<double, c, q, q> value{};
213  tensor<double, c, dim, q, q> gradient{};
214 
215  // apply the shape functions
216  for (int i = 0; i < c; i++) {
217  auto A0 = contract<1, 1>(X[i], B);
218  auto A1 = contract<1, 1>(X[i], G);
219 
220  value(i) = contract<0, 1>(A0, B);
221  gradient(i, 0) = contract<0, 1>(A1, B);
222  gradient(i, 1) = contract<0, 1>(A0, G);
223  }
224 
225  // transpose the quadrature data into a flat tensor of tuples
226  union {
227  tensor<qf_input_type, q * q> one_dimensional;
228  tensor<tuple<tensor<double, c>, tensor<double, c, dim> >, q, q> two_dimensional;
229  } output;
230 
231  for (int qy = 0; qy < q; qy++) {
232  for (int qx = 0; qx < q; qx++) {
233  for (int i = 0; i < c; i++) {
234  get<VALUE>(output.two_dimensional(qy, qx))[i] = value(i, qy, qx);
235  for (int j = 0; j < dim; j++) {
236  get<GRADIENT>(output.two_dimensional(qy, qx))[i][j] = gradient(i, j, qy, qx);
237  }
238  }
239  }
240  }
241 
242  return output.one_dimensional;
243  }
244 
245  // source can be one of: {zero, double, tensor<double,dim>, tensor<double,dim,dim>}
246  // flux can be one of: {zero, tensor<double,dim>, tensor<double,dim,dim>, tensor<double,dim,dim,dim>,
247  // tensor<double,dim,dim,dim>}
248  template <typename source_type, typename flux_type, int q>
249  SERAC_HOST_DEVICE static void integrate(const tensor<tuple<source_type, flux_type>, q * q>& qf_output,
250  const TensorProductQuadratureRule<q>&, dof_type* element_residual,
251  int step = 1)
252  {
253  if constexpr (is_zero<source_type>{} && is_zero<flux_type>{}) {
254  return;
255  }
256 
257  constexpr int ntrial = std::max(size(source_type{}), size(flux_type{}) / dim) / c;
258 
259  using s_buffer_type = std::conditional_t<is_zero<source_type>{}, zero, tensor<double, q, q> >;
260  using f_buffer_type = std::conditional_t<is_zero<flux_type>{}, zero, tensor<double, dim, q, q> >;
261 
262  static constexpr bool apply_weights = true;
263  static constexpr auto B = calculate_B<apply_weights, q>();
264  static constexpr auto G = calculate_G<apply_weights, q>();
265 
266  for (int j = 0; j < ntrial; j++) {
267  for (int i = 0; i < c; i++) {
268  s_buffer_type source;
269  f_buffer_type flux;
270 
271  for (int qy = 0; qy < q; qy++) {
272  for (int qx = 0; qx < q; qx++) {
273  [[maybe_unused]] int Q = qy * q + qx;
274  if constexpr (!is_zero<source_type>{}) {
275  source(qy, qx) = reinterpret_cast<const double*>(&get<SOURCE>(qf_output[Q]))[i * ntrial + j];
276  }
277 
278  if constexpr (!is_zero<flux_type>{}) {
279  for (int k = 0; k < dim; k++) {
280  flux(k, qy, qx) = reinterpret_cast<const double*>(&get<FLUX>(qf_output[Q]))[(i * dim + k) * ntrial + j];
281  }
282  }
283  }
284  }
285 
286  auto A0 = contract<1, 0>(source, B) + contract<1, 0>(flux(0), G);
287  auto A1 = contract<1, 0>(flux(1), B);
288 
289  element_residual[j * step](i) += contract<0, 0>(A0, B) + contract<0, 0>(A1, G);
290  }
291  }
292  }
293 
294 #if 0
295 
296  template <int q>
297  static SERAC_DEVICE auto interpolate(const dof_type& X, const tensor<double, dim, dim>& J,
298  const TensorProductQuadratureRule<q>& rule, cache_type<q>& A)
299  {
300  int tidx = threadIdx.x % q;
301  int tidy = threadIdx.x / q;
302 
303  static constexpr auto points1D = GaussLegendreNodes<q>();
304  static constexpr auto B_ = [=]() {
305  tensor<double, q, n> B{};
306  for (int i = 0; i < q; i++) {
307  B[i] = GaussLobattoInterpolation<n>(points1D[i]);
308  }
309  return B;
310  }();
311 
312  static constexpr auto G_ = [=]() {
313  tensor<double, q, n> G{};
314  for (int i = 0; i < q; i++) {
315  G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
316  }
317  return G;
318  }();
319 
320  __shared__ tensor<double, q, n> B;
321  __shared__ tensor<double, q, n> G;
322  for (int entry = threadIdx.x; entry < n * q; entry += q * q) {
323  int i = entry % n;
324  int j = entry / n;
325  B(j, i) = B_(j, i);
326  G(j, i) = G_(j, i);
327  }
328  __syncthreads();
329 
330  tuple<tensor<double, c>, tensor<double, c, dim> > qf_input{};
331 
332  for (int i = 0; i < c; i++) {
333  for (int dy = tidy; dy < n; dy += q) {
334  for (int qx = tidx; qx < q; qx += q) {
335  double sum[2]{};
336  for (int dx = 0; dx < n; dx++) {
337  sum[0] += B(qx, dx) * X(i, dy, dx);
338  sum[1] += G(qx, dx) * X(i, dy, dx);
339  }
340  A(0, dy, qx) = sum[0];
341  A(1, dy, qx) = sum[1];
342  }
343  }
344  __syncthreads();
345 
346  for (int qy = tidy; qy < q; qy += q) {
347  for (int qx = tidx; qx < q; qx += q) {
348  for (int dy = 0; dy < n; dy++) {
349  get<0>(qf_input)[i] += B(qy, dy) * A(0, dy, qx);
350  get<1>(qf_input)[i][0] += B(qy, dy) * A(1, dy, qx);
351  get<1>(qf_input)[i][1] += G(qy, dy) * A(0, dy, qx);
352  }
353  }
354  }
355  }
356 
357  get<1>(qf_input) = dot(get<1>(qf_input), inv(J));
358 
359  return qf_input;
360  }
361 
362  template <typename T1, typename T2, int q>
363  static SERAC_DEVICE void integrate(tuple<T1, T2>& response, const tensor<double, dim, dim>& J,
364  const TensorProductQuadratureRule<q>& rule, cache_type<q>& A, dof_type& residual)
365  {
366  int tidx = threadIdx.x % q;
367  int tidy = threadIdx.x / q;
368 
369  static constexpr auto points1D = GaussLegendreNodes<q>();
370  static constexpr auto weights1D = GaussLegendreWeights<q>();
371  static constexpr auto B_ = [=]() {
372  tensor<double, q, n> B{};
373  for (int i = 0; i < q; i++) {
374  B[i] = GaussLobattoInterpolation<n>(points1D[i]);
375  }
376  return B;
377  }();
378 
379  static constexpr auto G_ = [=]() {
380  tensor<double, q, n> G{};
381  for (int i = 0; i < q; i++) {
382  G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
383  }
384  return G;
385  }();
386 
387  __shared__ tensor<double, q, n> B;
388  __shared__ tensor<double, q, n> G;
389  for (int entry = threadIdx.x; entry < n * q; entry += q * q) {
390  int i = entry % n;
391  int j = entry / n;
392  B(j, i) = B_(j, i);
393  G(j, i) = G_(j, i);
394  }
395  __syncthreads();
396 
397  auto dv = det(J) * weights1D[tidx] * weights1D[tidy];
398 
399  get<0>(response) = get<0>(response) * dv;
400  get<1>(response) = dot(get<1>(response), inv(transpose(J))) * dv;
401 
402  for (int i = 0; i < c; i++) {
403  // this first contraction is performed a little differently, since `response` is not
404  // in shared memory, so each thread can only access its own values
405  for (int qy = tidy; qy < q; qy += q) {
406  for (int dx = tidx; dx < n; dx += q) {
407  A(0, dx, qy) = 0.0;
408  A(1, dx, qy) = 0.0;
409  }
410  }
411  __syncthreads();
412 
413  for (int offset = 0; offset < n; offset++) {
414  int dx = (tidx + offset) % n;
415  auto sum = B(tidx, dx) * get<0>(response)(i) + G(tidx, dx) * get<1>(response)(i, 0);
416  atomicAdd(&A(0, dx, tidy), sum);
417  atomicAdd(&A(1, dx, tidy), B(tidx, dx) * get<1>(response)(i, 1));
418  }
419  __syncthreads();
420 
421  for (int dy = tidy; dy < n; dy += q) {
422  for (int dx = tidx; dx < n; dx += q) {
423  double sum = 0.0;
424  for (int qy = 0; qy < q; qy++) {
425  sum += B(qy, dy) * A(0, dx, qy);
426  sum += G(qy, dy) * A(1, dx, qy);
427  }
428  residual(i, dy, dx) += sum;
429  }
430  }
431  }
432  }
433 
434 #endif
435 };
#define SERAC_DEVICE
Macro that evaluates to __device__ when compiling with nvcc and does nothing on a host compiler.
Definition: accelerator.hpp:46
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE auto max(dual< gradient_type > a, double b)
Implementation of max for dual numbers.
Definition: dual.hpp:229
constexpr SERAC_HOST_DEVICE int size(const tensor< T, n... > &)
returns the total number of stored values in a tensor
Definition: tensor.hpp:1934
constexpr SERAC_HOST_DEVICE auto det(const isotropic_tensor< T, m, m > &I)
compute the determinant of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:379
constexpr SERAC_HOST_DEVICE auto inv(const isotropic_tensor< T, m, m > &I)
return the inverse of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto transpose(const isotropic_tensor< T, m, m > &I)
return the transpose of an isotropic tensor