25 struct finite_element<mfem::Geometry::SQUARE, Hcurl<p> > {
26 static constexpr
auto geometry = mfem::Geometry::SQUARE;
27 static constexpr
auto family = Family::HCURL;
28 static constexpr
int dim = 2;
29 static constexpr
int n = (p + 1);
30 static constexpr
int ndof = 2 * p * (p + 1);
31 static constexpr
int components = 1;
33 static constexpr
int VALUE = 0, CURL = 1;
34 static constexpr
int SOURCE = 0, FLUX = 1;
37 typename std::conditional<components == 1, tensor<double, ndof>, tensor<double, ndof, components> >
::type;
43 tensor<double, p + 1, p> x;
44 tensor<double, p, p + 1> y;
46 using dof_type_if = dof_type;
49 using cpu_batched_values_type = tensor<tensor<double, 2>, q, q>;
52 using cpu_batched_derivatives_type = tensor<double, q, q>;
54 static constexpr
auto directions = [] {
55 int dof_per_direction = p * (p + 1);
57 tensor<double, ndof, dim> directions{};
58 for (
int i = 0; i < dof_per_direction; i++) {
59 directions[i + 0 * dof_per_direction] = {1.0, 0.0};
60 directions[i + 1 * dof_per_direction] = {0.0, 1.0};
65 static constexpr
auto nodes = [] {
66 auto legendre_nodes = GaussLegendreNodes<p, mfem::Geometry::SEGMENT>();
67 auto lobatto_nodes = GaussLobattoNodes<p + 1>();
69 tensor<double, ndof, dim> nodes{};
72 for (
int j = 0; j < p + 1; j++) {
73 for (
int i = 0; i < p; i++) {
74 nodes[count++] = {legendre_nodes[i], lobatto_nodes[j]};
78 for (
int j = 0; j < p; j++) {
79 for (
int i = 0; i < p + 1; i++) {
80 nodes[count++] = {lobatto_nodes[i], legendre_nodes[j]};
97 template <
bool apply_weights,
int q>
98 static constexpr
auto calculate_B1()
100 constexpr
auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
101 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
102 tensor<double, q, p> B1{};
103 for (
int i = 0; i < q; i++) {
104 B1[i] = GaussLegendreInterpolation<p>(points1D[i]);
105 if constexpr (apply_weights) B1[i] = B1[i] * weights1D[i];
120 template <
bool apply_weights,
int q>
121 static constexpr
auto calculate_B2()
123 constexpr
auto points1D = GaussLegendreNodes<q>();
124 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q>();
125 tensor<double, q, p + 1> B2{};
126 for (
int i = 0; i < q; i++) {
127 B2[i] = GaussLobattoInterpolation<p + 1>(points1D[i]);
128 if constexpr (apply_weights) B2[i] = B2[i] * weights1D[i];
143 template <
bool apply_weights,
int q>
144 static constexpr
auto calculate_G2()
146 constexpr
auto points1D = GaussLegendreNodes<q>();
147 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q>();
148 tensor<double, q, p + 1> G2{};
149 for (
int i = 0; i < q; i++) {
150 G2[i] = GaussLobattoInterpolationDerivative<p + 1>(points1D[i]);
151 if constexpr (apply_weights) G2[i] = G2[i] * weights1D[i];
194 SERAC_HOST_DEVICE static constexpr tensor<double, ndof, dim> shape_functions(tensor<double, dim> xi)
197 tensor<double, ndof, dim> N{};
200 tensor<double, p + 1> N_closed = GaussLobattoInterpolation<p + 1>(xi[1]);
201 tensor<double, p> N_open = GaussLegendreInterpolation<p>(xi[0]);
202 for (
int j = 0; j < p + 1; j++) {
203 for (
int i = 0; i < p; i++) {
204 N[count++] = {N_open[i] * N_closed[j], 0.0};
209 N_closed = GaussLobattoInterpolation<p + 1>(xi[0]);
210 N_open = GaussLegendreInterpolation<p>(xi[1]);
211 for (
int j = 0; j < p; j++) {
212 for (
int i = 0; i < p + 1; i++) {
213 N[count++] = {0.0, N_closed[i] * N_open[j]};
220 SERAC_HOST_DEVICE static constexpr tensor<double, ndof> shape_function_curl(tensor<double, dim> xi)
223 tensor<double, ndof> curl_z{};
226 tensor<double, p + 1> dN_closed = GaussLobattoInterpolationDerivative<p + 1>(xi[1]);
227 tensor<double, p> N_open = GaussLegendreInterpolation<p>(xi[0]);
228 for (
int j = 0; j < p + 1; j++) {
229 for (
int i = 0; i < p; i++) {
230 curl_z[count++] = -N_open[i] * dN_closed[j];
235 dN_closed = GaussLobattoInterpolationDerivative<p + 1>(xi[0]);
236 N_open = GaussLegendreInterpolation<p>(xi[1]);
237 for (
int j = 0; j < p; j++) {
238 for (
int i = 0; i < p + 1; i++) {
239 curl_z[count++] = dN_closed[i] * N_open[j];
246 template <
typename in_t,
int q>
247 static auto batch_apply_shape_fn(
int j, tensor<in_t, q * q> input,
const TensorProductQuadratureRule<q>&)
249 constexpr
bool apply_weights =
false;
250 constexpr tensor<double, q, p> B1 = calculate_B1<apply_weights, q>();
251 constexpr tensor<double, q, p + 1> B2 = calculate_B2<apply_weights, q>();
252 constexpr tensor<double, q, p + 1> G2 = calculate_G2<apply_weights, q>();
255 int dir = j / ((p + 1) * p);
260 jx = (j % ((p + 1) * p)) % n;
261 jy = (j % ((p + 1) * p)) / n;
264 using source_t = decltype(
dot(get<0>(get<0>(in_t{})), tensor<double, 2>{}) + get<1>(get<0>(in_t{})) *
double{});
265 using flux_t = decltype(
dot(get<0>(get<1>(in_t{})), tensor<double, 2>{}) + get<1>(get<1>(in_t{})) *
double{});
267 tensor<tuple<source_t, flux_t>, q * q> output;
269 for (
int qy = 0; qy < q; qy++) {
270 for (
int qx = 0; qx < q; qx++) {
271 tensor<double, 2> phi_j{(dir == 0) * B1(qx, jx) * B2(qy, jy), (dir == 1) * B1(qy, jy) * B2(qx, jx)};
273 double curl_phi_j = (dir == 0) * -B1(qx, jx) * G2(qy, jy) + (dir == 1) * B1(qy, jy) * G2(qx, jx);
276 const auto& d00 = get<0>(get<0>(input(Q)));
277 const auto& d01 = get<1>(get<0>(input(Q)));
278 const auto& d10 = get<0>(get<1>(input(Q)));
279 const auto& d11 = get<1>(get<1>(input(Q)));
281 output[Q] = {
dot(d00, phi_j) + d01 * curl_phi_j,
dot(d10, phi_j) + d11 * curl_phi_j};
289 SERAC_HOST_DEVICE static auto interpolate(
const dof_type& element_values,
const TensorProductQuadratureRule<q>&)
291 constexpr
bool apply_weights =
false;
292 constexpr tensor<double, q, p> B1 = calculate_B1<apply_weights, q>();
293 constexpr tensor<double, q, p + 1> B2 = calculate_B2<apply_weights, q>();
294 constexpr tensor<double, q, p + 1> G2 = calculate_G2<apply_weights, q>();
296 tensor<double, 2, q, q> value{};
297 tensor<double, q, q> curl{};
300 constexpr
int x = 1, y = 0;
302 auto A = contract<x, 1>(element_values.x, B1);
303 value[0] = contract<y, 1>(A, B2);
304 curl -= contract<y, 1>(A, G2);
306 A = contract<y, 1>(element_values.y, B1);
307 value[1] = contract<x, 1>(A, B2);
308 curl += contract<x, 1>(A, G2);
310 tensor<tuple<tensor<double, 2>,
double>, q * q> qf_inputs;
313 for (
int qy = 0; qy < q; qy++) {
314 for (
int qx = 0; qx < q; qx++) {
315 for (
int i = 0; i < dim; i++) {
316 get<VALUE>(qf_inputs(count))[i] = value[i](qy, qx);
318 get<CURL>(qf_inputs(count)) = curl(qy, qx);
326 template <
typename source_type,
typename flux_type,
int q>
328 const TensorProductQuadratureRule<q>&, dof_type* element_residual,
329 [[maybe_unused]]
int step = 1)
331 constexpr
bool apply_weights =
true;
332 constexpr tensor<double, q, p> B1 = calculate_B1<apply_weights, q>();
333 constexpr tensor<double, q, p + 1> B2 = calculate_B2<apply_weights, q>();
334 constexpr tensor<double, q, p + 1> G2 = calculate_G2<apply_weights, q>();
336 tensor<double, 2, q, q> source{};
337 tensor<double, q, q> flux{};
339 for (
int qy = 0; qy < q; qy++) {
340 for (
int qx = 0; qx < q; qx++) {
342 tensor<double, dim> s{get<SOURCE>(qf_output[Q])};
343 for (
int i = 0; i < dim; i++) {
344 source(i, qy, qx) = s[i];
346 flux(qy, qx) = get<FLUX>(qf_output[Q]);
351 constexpr
int x = 1, y = 0;
353 auto A = contract<y, 0>(source[0], B2) - contract<y, 0>(flux, G2);
354 element_residual[0].x += contract<x, 0>(A, B1);
356 A = contract<x, 0>(source[1], B2) + contract<x, 0>(flux, G2);
357 element_residual[0].y += contract<y, 0>(A, B1);
363 static SERAC_DEVICE auto interpolate(
const dof_type& element_values,
const tensor<double, dim, dim>& J,
364 const TensorProductQuadratureRule<q>& rule, cache_type<q>& A)
366 int tidx = threadIdx.x % q;
367 int tidy = threadIdx.x / q;
369 static constexpr
auto points1D = GaussLegendreNodes<q>();
371 static constexpr
auto B1_ = [=]() {
372 tensor<double, q, p> B1{};
373 for (
int i = 0; i < q; i++) {
374 B1[i] = GaussLegendreInterpolation<p>(points1D[i]);
379 static constexpr
auto B2_ = [=]() {
380 tensor<double, q, n> B2{};
381 for (
int i = 0; i < q; i++) {
382 B2[i] = GaussLobattoInterpolation<n>(points1D[i]);
387 static constexpr
auto G2_ = [=]() {
388 tensor<double, q, n> G2{};
389 for (
int i = 0; i < q; i++) {
390 G2[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
395 __shared__ tensor<double, q, p> B1;
396 __shared__ tensor<double, q, n> B2;
397 __shared__ tensor<double, q, n> G2;
398 for (
int entry = threadIdx.x; entry < n * q; entry += q * q) {
402 B1(j, i) = B1_(j, i);
404 B2(j, i) = B2_(j, i);
405 G2(j, i) = G2_(j, i);
409 tuple<tensor<double, dim>,
double> qf_input{};
414 for (
int dy = tidy; dy < p + 1; dy += q) {
415 for (
int qx = tidx; qx < q; qx += q) {
417 for (
int dx = 0; dx < p; dx++) {
418 sum += B1(qx, dx) * element_values.x(dy, dx);
425 for (
int qy = tidy; qy < q; qy += q) {
426 for (
int qx = tidx; qx < q; qx += q) {
428 for (
int dy = 0; dy < (p + 1); dy++) {
429 sum[0] += B2(qy, dy) * A(dy, qx);
430 sum[1] += G2(qy, dy) * A(dy, qx);
432 serac::get<0>(qf_input)[0] += sum[0];
433 serac::get<1>(qf_input) -= sum[1];
441 for (
int dx = tidx; dx < p + 1; dx += q) {
442 for (
int qy = tidy; qy < q; qy += q) {
444 for (
int dy = 0; dy < p; dy++) {
445 sum += B1(qy, dy) * element_values.y(dy, dx);
452 for (
int qy = tidy; qy < q; qy += q) {
453 for (
int qx = tidx; qx < q; qx += q) {
455 for (
int dx = 0; dx < (p + 1); dx++) {
456 sum[0] += B2(qx, dx) * A(dx, qy);
457 sum[1] += G2(qx, dx) * A(dx, qy);
459 serac::get<0>(qf_input)[1] += sum[0];
460 serac::get<1>(qf_input) += sum[1];
467 serac::get<1>(qf_input) = serac::get<1>(qf_input) /
det(J);
472 template <
typename T1,
typename T2,
int q>
473 static SERAC_DEVICE void integrate(tuple<T1, T2>& response,
const tensor<double, dim, dim>& J,
474 const TensorProductQuadratureRule<q>& rule, cache_type<q>& A, dof_type& residual)
476 int tidx = threadIdx.x % q;
477 int tidy = threadIdx.x / q;
479 static constexpr
auto points1D = GaussLegendreNodes<q>();
480 static constexpr
auto weights1D = GaussLegendreWeights<q>();
482 static constexpr
auto B1_ = [=]() {
483 tensor<double, q, p> B1{};
484 for (
int i = 0; i < q; i++) {
485 B1[i] = GaussLegendreInterpolation<p>(points1D[i]);
490 static constexpr
auto B2_ = [=]() {
491 tensor<double, q, n> B2{};
492 for (
int i = 0; i < q; i++) {
493 B2[i] = GaussLobattoInterpolation<n>(points1D[i]);
498 static constexpr
auto G2_ = [=]() {
499 tensor<double, q, n> G2{};
500 for (
int i = 0; i < q; i++) {
501 G2[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
506 __shared__ tensor<double, q, p> B1;
507 __shared__ tensor<double, q, n> B2;
508 __shared__ tensor<double, q, n> G2;
509 for (
int entry = threadIdx.x; entry < n * q; entry += q * q) {
513 B1(j, i) = B1_(j, i);
515 B2(j, i) = B2_(j, i);
516 G2(j, i) = G2_(j, i);
525 auto dv = detJ * weights1D[tidx] * weights1D[tidy];
527 get<0>(response) =
linear_solve(J, get<0>(response)) * dv;
528 get<1>(response) = get<1>(response) * (dv / detJ);
536 for (
int qy = tidy; qy < q; qy += q) {
537 for (
int dx = tidx; dx < n; dx += q) {
543 for (
int offset = 0; offset < n; offset++) {
544 int dy = (tidy + offset) % n;
545 auto sum = B2(tidy, dy) * get<0>(response)[0] - G2(tidy, dy) * get<1>(response);
546 atomicAdd(&A(dy, tidx), sum);
550 for (
int dy = tidy; dy < p + 1; dy += q) {
551 for (
int dx = tidx; dx < p; dx += q) {
553 for (
int qx = 0; qx < q; qx++) {
554 sum += B1(qx, dx) * A(dy, qx);
556 residual.x(dy, dx) += sum;
567 for (
int qy = tidy; qy < q; qy += q) {
568 for (
int dx = tidx; dx < n; dx += q) {
574 for (
int offset = 0; offset < n; offset++) {
575 int dx = (tidx + offset) % n;
576 auto sum = B2(tidx, dx) * get<0>(response)[1] + G2(tidx, dx) * get<1>(response);
577 atomicAdd(&A(dx, tidy), sum);
581 for (
int dy = tidy; dy < p; dy += q) {
582 for (
int dx = tidx; dx < p + 1; dx += q) {
584 for (
int qy = 0; qy < q; qy++) {
585 sum += B1(qy, dy) * A(dx, qy);
587 residual.y(dy, dx) += sum;
#define SERAC_DEVICE
Macro that evaluates to __device__ when compiling with nvcc and does nothing on a host compiler.
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
constexpr SERAC_HOST_DEVICE auto linear_solve(const LuFactorization< S, n > &lu_factors, const tensor< T, n, m... > &b)
constexpr SERAC_HOST_DEVICE auto det(const isotropic_tensor< T, m, m > &I)
compute the determinant of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
constexpr SERAC_HOST_DEVICE auto transpose(const isotropic_tensor< T, m, m > &I)
return the transpose of an isotropic tensor