19 template <
int p,
int c>
20 struct finite_element<mfem::Geometry::SQUARE, L2<p, c> > {
21 static constexpr
auto geometry = mfem::Geometry::SQUARE;
22 static constexpr
auto family = Family::L2;
23 static constexpr
int components = c;
24 static constexpr
int dim = 2;
25 static constexpr
int n = (p + 1);
26 static constexpr
int ndof = (p + 1) * (p + 1);
28 static constexpr
int VALUE = 0, GRADIENT = 1;
29 static constexpr
int SOURCE = 0, FLUX = 1;
32 typename std::conditional<components == 1, tensor<double, ndof>, tensor<double, ndof, components> >
::type;
34 using dof_type = tensor<double, c, p + 1, p + 1>;
35 using dof_type_if = tensor<double, c, 2, p + 1, p + 1>;
37 using value_type =
typename std::conditional<components == 1, double, tensor<double, components> >
::type;
38 using derivative_type =
39 typename std::conditional<components == 1, tensor<double, dim>, tensor<double, components, dim> >
::type;
40 using qf_input_type = tuple<value_type, derivative_type>;
77 SERAC_HOST_DEVICE static constexpr tensor<double, ndof> shape_functions(tensor<double, dim> xi)
79 auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
80 auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
84 tensor<double, ndof> N{};
85 for (
int j = 0; j < p + 1; j++) {
86 for (
int i = 0; i < p + 1; i++) {
87 N[count++] = N_xi[i] * N_eta[j];
93 SERAC_HOST_DEVICE static constexpr tensor<double, ndof, dim> shape_function_gradients(tensor<double, dim> xi)
95 auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
96 auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
97 auto dN_xi = GaussLobattoInterpolationDerivative<p + 1>(xi[0]);
98 auto dN_eta = GaussLobattoInterpolationDerivative<p + 1>(xi[1]);
102 tensor<double, ndof, dim> dN{};
103 for (
int j = 0; j < p + 1; j++) {
104 for (
int i = 0; i < p + 1; i++) {
105 dN[count++] = {dN_xi[i] * N_eta[j], N_xi[i] * dN_eta[j]};
121 template <
bool apply_weights,
int q>
122 static constexpr
auto calculate_B()
124 constexpr
auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
125 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
126 tensor<double, q, n> B{};
127 for (
int i = 0; i < q; i++) {
128 B[i] = GaussLobattoInterpolation<n>(points1D[i]);
129 if constexpr (apply_weights) B[i] = B[i] * weights1D[i];
144 template <
bool apply_weights,
int q>
145 static constexpr
auto calculate_G()
147 constexpr
auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
148 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
149 tensor<double, q, n> G{};
150 for (
int i = 0; i < q; i++) {
151 G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
152 if constexpr (apply_weights) G[i] = G[i] * weights1D[i];
157 template <
typename in_t,
int q>
158 static auto batch_apply_shape_fn(
int j, tensor<in_t, q * q> input,
const TensorProductQuadratureRule<q>&)
160 static constexpr
bool apply_weights =
false;
161 static constexpr
auto B = calculate_B<apply_weights, q>();
162 static constexpr
auto G = calculate_G<apply_weights, q>();
167 using source_t = decltype(get<0>(get<0>(in_t{})) +
dot(get<1>(get<0>(in_t{})), tensor<double, 2>{}));
168 using flux_t = decltype(get<0>(get<1>(in_t{})) +
dot(get<1>(get<1>(in_t{})), tensor<double, 2>{}));
170 tensor<tuple<source_t, flux_t>, q * q> output;
172 for (
int qy = 0; qy < q; qy++) {
173 for (
int qx = 0; qx < q; qx++) {
174 double phi_j = B(qx, jx) * B(qy, jy);
175 tensor<double, dim> dphi_j_dxi = {G(qx, jx) * B(qy, jy), B(qx, jx) * G(qy, jy)};
178 const auto& d00 = get<0>(get<0>(input(Q)));
179 const auto& d01 = get<1>(get<0>(input(Q)));
180 const auto& d10 = get<0>(get<1>(input(Q)));
181 const auto& d11 = get<1>(get<1>(input(Q)));
183 output[Q] = {d00 * phi_j +
dot(d01, dphi_j_dxi), d10 * phi_j +
dot(d11, dphi_j_dxi)};
190 template <
typename T,
int q>
191 static auto batch_apply_shape_fn_interior_face(
int j, tensor<T, q * q> input,
const TensorProductQuadratureRule<q>&)
193 static constexpr
bool apply_weights =
false;
194 static constexpr
auto B = calculate_B<apply_weights, q>();
196 using source0_t = decltype(get<0>(get<0>(T{})) + get<1>(get<0>(T{})));
197 using source1_t = decltype(get<0>(get<1>(T{})) + get<1>(get<1>(T{})));
200 int jy = (j % ndof) / n;
203 tensor<tuple<source0_t, source1_t>, q * q> output;
205 for (
int qy = 0; qy < q; qy++) {
206 for (
int qx = 0; qx < q; qx++) {
207 double phi0_j = B(qx, jx) * B(qy, jy) * (s == 0);
208 double phi1_j = B(qx, jx) * B(qy, jy) * (s == 1);
211 const auto& d00 = get<0>(get<0>(input(Q)));
212 const auto& d01 = get<1>(get<0>(input(Q)));
213 const auto& d10 = get<0>(get<1>(input(Q)));
214 const auto& d11 = get<1>(get<1>(input(Q)));
216 output[Q] = {d00 * phi0_j + d01 * phi1_j, d10 * phi0_j + d11 * phi1_j};
238 SERAC_HOST_DEVICE static auto interpolate(
const dof_type& X,
const TensorProductQuadratureRule<q>&)
240 static constexpr
bool apply_weights =
false;
241 static constexpr
auto B = calculate_B<apply_weights, q>();
242 static constexpr
auto G = calculate_G<apply_weights, q>();
244 tensor<double, c, q, q> value{};
245 tensor<double, c, dim, q, q> gradient{};
248 for (
int i = 0; i < c; i++) {
249 auto A0 = contract<1, 1>(X[i], B);
250 auto A1 = contract<1, 1>(X[i], G);
252 value(i) = contract<0, 1>(A0, B);
253 gradient(i, 0) = contract<0, 1>(A1, B);
254 gradient(i, 1) = contract<0, 1>(A0, G);
259 tensor<qf_input_type, q * q> one_dimensional;
260 tensor<tuple<tensor<double, c>, tensor<double, c, dim> >, q, q> two_dimensional;
263 for (
int qy = 0; qy < q; qy++) {
264 for (
int qx = 0; qx < q; qx++) {
265 for (
int i = 0; i < c; i++) {
266 get<VALUE>(output.two_dimensional(qy, qx))[i] = value(i, qy, qx);
267 for (
int j = 0; j < dim; j++) {
268 get<GRADIENT>(output.two_dimensional(qy, qx))[i][j] = gradient(i, j, qy, qx);
274 return output.one_dimensional;
280 const TensorProductQuadratureRule<q>&)
282 static constexpr
bool apply_weights =
false;
283 static constexpr
auto B = calculate_B<apply_weights, q>();
285 tensor<tuple<value_type, value_type>, q * q> output;
287 tensor<double, c, q, q> value{};
290 for (
int i = 0; i < c; i++) {
291 auto A0 = contract<1, 1>(X(i, 0), B);
292 value(i) = contract<0, 1>(A0, B);
295 for (
int qy = 0; qy < q; qy++) {
296 for (
int qx = 0; qx < q; qx++) {
297 if constexpr (c == 1) {
298 get<0>(output[qy * q + qx]) = value(0, qy, qx);
300 for (
int i = 0; i < c; i++) {
301 get<0>(output[qy * q + qx])[i] = value(i, qy, qx);
308 for (
int i = 0; i < c; i++) {
309 auto A0 = contract<1, 1>(X(i, 1), B);
310 value(i) = contract<0, 1>(A0, B);
313 for (
int qy = 0; qy < q; qy++) {
314 for (
int qx = 0; qx < q; qx++) {
315 if constexpr (c == 1) {
316 get<1>(output[qy * q + qx]) = value(0, qy, qx);
318 for (
int i = 0; i < c; i++) {
319 get<1>(output[qy * q + qx])[i] = value(i, qy, qx);
331 template <
typename source_type,
typename flux_type,
int q>
333 const TensorProductQuadratureRule<q>&, dof_type* element_residual,
336 if constexpr (is_zero<source_type>{} && is_zero<flux_type>{}) {
340 constexpr
int ntrial =
std::max(
size(source_type{}),
size(flux_type{}) / dim) / c;
342 using s_buffer_type = std::conditional_t<is_zero<source_type>{}, zero, tensor<double, q, q> >;
343 using f_buffer_type = std::conditional_t<is_zero<flux_type>{}, zero, tensor<double, dim, q, q> >;
345 static constexpr
bool apply_weights =
true;
346 static constexpr
auto B = calculate_B<apply_weights, q>();
347 static constexpr
auto G = calculate_G<apply_weights, q>();
349 for (
int j = 0; j < ntrial; j++) {
350 for (
int i = 0; i < c; i++) {
351 s_buffer_type source;
354 for (
int qy = 0; qy < q; qy++) {
355 for (
int qx = 0; qx < q; qx++) {
357 if constexpr (!is_zero<source_type>{}) {
358 source(qy, qx) =
reinterpret_cast<const double*
>(&get<SOURCE>(qf_output[Q]))[i * ntrial + j];
360 if constexpr (!is_zero<flux_type>{}) {
361 for (
int k = 0; k < dim; k++) {
362 flux(k, qy, qx) =
reinterpret_cast<const double*
>(&get<FLUX>(qf_output[Q]))[(i * dim + k) * ntrial + j];
368 auto A0 = contract<1, 0>(source, B) + contract<1, 0>(flux(0), G);
369 auto A1 = contract<1, 0>(flux(1), B);
371 element_residual[j * step](i) += contract<0, 0>(A0, B) + contract<0, 0>(A1, G);
376 template <
typename T,
int q>
378 const TensorProductQuadratureRule<q>&, dof_type_if* element_residual,
379 [[maybe_unused]]
int step = 1)
381 constexpr
int ntrial =
size(T{}) / c;
382 static constexpr
bool apply_weights =
true;
383 static constexpr
auto B = calculate_B<apply_weights, q>();
385 for (
int j = 0; j < ntrial; j++) {
386 for (
int i = 0; i < c; i++) {
387 tensor<double, q, q> source;
391 for (
int qy = 0; qy < q; qy++) {
392 for (
int qx = 0; qx < q; qx++) {
394 source(qy, qx) =
reinterpret_cast<const double*
>(&get<0>(qf_output[Q]))[i * ntrial + j];
398 auto A0 = contract<1, 0>(source, B);
399 element_residual[j * step](i, 0) += contract<0, 0>(A0, B);
404 for (
int qy = 0; qy < q; qy++) {
405 for (
int qx = 0; qx < q; qx++) {
407 source(qy, qx) =
reinterpret_cast<const double*
>(&get<1>(qf_output[Q]))[i * ntrial + j];
411 auto A0 = contract<1, 0>(source, B);
412 element_residual[j * step](i, 1) += contract<0, 0>(A0, B);
421 static SERAC_DEVICE auto interpolate(
const dof_type& X,
const tensor<double, dim, dim>& J,
422 const TensorProductQuadratureRule<q>& rule, cache_type<q>& A)
424 int tidx = threadIdx.x % q;
425 int tidy = threadIdx.x / q;
427 static constexpr
auto points1D = GaussLegendreNodes<q>();
428 static constexpr
auto B_ = [=]() {
429 tensor<double, q, n> B{};
430 for (
int i = 0; i < q; i++) {
431 B[i] = GaussLobattoInterpolation<n>(points1D[i]);
436 static constexpr
auto G_ = [=]() {
437 tensor<double, q, n> G{};
438 for (
int i = 0; i < q; i++) {
439 G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
444 __shared__ tensor<double, q, n> B;
445 __shared__ tensor<double, q, n> G;
446 for (
int entry = threadIdx.x; entry < n * q; entry += q * q) {
454 tuple<tensor<double, c>, tensor<double, c, dim> > qf_input{};
456 for (
int i = 0; i < c; i++) {
457 for (
int dy = tidy; dy < n; dy += q) {
458 for (
int qx = tidx; qx < q; qx += q) {
460 for (
int dx = 0; dx < n; dx++) {
461 sum[0] += B(qx, dx) * X(i, dy, dx);
462 sum[1] += G(qx, dx) * X(i, dy, dx);
464 A(0, dy, qx) = sum[0];
465 A(1, dy, qx) = sum[1];
470 for (
int qy = tidy; qy < q; qy += q) {
471 for (
int qx = tidx; qx < q; qx += q) {
472 for (
int dy = 0; dy < n; dy++) {
473 get<0>(qf_input)[i] += B(qy, dy) * A(0, dy, qx);
474 get<1>(qf_input)[i][0] += B(qy, dy) * A(1, dy, qx);
475 get<1>(qf_input)[i][1] += G(qy, dy) * A(0, dy, qx);
481 get<1>(qf_input) =
dot(get<1>(qf_input),
inv(J));
486 template <
typename T1,
typename T2,
int q>
487 static SERAC_DEVICE void integrate(tuple<T1, T2>& response,
const tensor<double, dim, dim>& J,
488 const TensorProductQuadratureRule<q>& rule, cache_type<q>& A, dof_type& residual)
490 int tidx = threadIdx.x % q;
491 int tidy = threadIdx.x / q;
493 static constexpr
auto points1D = GaussLegendreNodes<q>();
494 static constexpr
auto weights1D = GaussLegendreWeights<q>();
495 static constexpr
auto B_ = [=]() {
496 tensor<double, q, n> B{};
497 for (
int i = 0; i < q; i++) {
498 B[i] = GaussLobattoInterpolation<n>(points1D[i]);
503 static constexpr
auto G_ = [=]() {
504 tensor<double, q, n> G{};
505 for (
int i = 0; i < q; i++) {
506 G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
511 __shared__ tensor<double, q, n> B;
512 __shared__ tensor<double, q, n> G;
513 for (
int entry = threadIdx.x; entry < n * q; entry += q * q) {
521 auto dv =
det(J) * weights1D[tidx] * weights1D[tidy];
523 get<0>(response) = get<0>(response) * dv;
526 for (
int i = 0; i < c; i++) {
529 for (
int qy = tidy; qy < q; qy += q) {
530 for (
int dx = tidx; dx < n; dx += q) {
537 for (
int offset = 0; offset < n; offset++) {
538 int dx = (tidx + offset) % n;
539 auto sum = B(tidx, dx) * get<0>(response)(i) + G(tidx, dx) * get<1>(response)(i, 0);
540 atomicAdd(&A(0, dx, tidy), sum);
541 atomicAdd(&A(1, dx, tidy), B(tidx, dx) * get<1>(response)(i, 1));
545 for (
int dy = tidy; dy < n; dy += q) {
546 for (
int dx = tidx; dx < n; dx += q) {
548 for (
int qy = 0; qy < q; qy++) {
549 sum += B(qy, dy) * A(0, dx, qy);
550 sum += G(qy, dy) * A(1, dx, qy);
552 residual(i, dy, dx) += sum;
#define SERAC_DEVICE
Macro that evaluates to __device__ when compiling with nvcc and does nothing on a host compiler.
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE auto max(dual< gradient_type > a, double b)
Implementation of max for dual numbers.
constexpr SERAC_HOST_DEVICE int size(const tensor< T, n... > &)
returns the total number of stored values in a tensor
constexpr SERAC_HOST_DEVICE auto det(const isotropic_tensor< T, m, m > &I)
compute the determinant of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
constexpr SERAC_HOST_DEVICE auto inv(const isotropic_tensor< T, m, m > &I)
return the inverse of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto transpose(const isotropic_tensor< T, m, m > &I)
return the transpose of an isotropic tensor