19 template <
int p,
int c>
20 struct finite_element<mfem::Geometry::SQUARE, H1<p, c> > {
21 static constexpr
auto geometry = mfem::Geometry::SQUARE;
22 static constexpr
auto family = Family::H1;
23 static constexpr
int components = c;
24 static constexpr
int dim = 2;
25 static constexpr
int order = p;
26 static constexpr
int n = (p + 1);
27 static constexpr
int ndof = (p + 1) * (p + 1);
29 static constexpr
int VALUE = 0, GRADIENT = 1;
30 static constexpr
int SOURCE = 0, FLUX = 1;
33 typename std::conditional<components == 1, tensor<double, ndof>, tensor<double, ndof, components> >
::type;
35 using dof_type = tensor<double, c, p + 1, p + 1>;
36 using dof_type_if = dof_type;
38 using value_type =
typename std::conditional<components == 1, double, tensor<double, components> >
::type;
39 using derivative_type =
40 typename std::conditional<components == 1, tensor<double, dim>, tensor<double, components, dim> >
::type;
41 using qf_input_type = tuple<value_type, derivative_type>;
78 SERAC_HOST_DEVICE static constexpr tensor<double, ndof> shape_functions(tensor<double, dim> xi)
80 auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
81 auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
85 tensor<double, ndof> N{};
86 for (
int j = 0; j < p + 1; j++) {
87 for (
int i = 0; i < p + 1; i++) {
88 N[count++] = N_xi[i] * N_eta[j];
94 SERAC_HOST_DEVICE static constexpr tensor<double, ndof, dim> shape_function_gradients(tensor<double, dim> xi)
96 auto N_xi = GaussLobattoInterpolation<p + 1>(xi[0]);
97 auto N_eta = GaussLobattoInterpolation<p + 1>(xi[1]);
98 auto dN_xi = GaussLobattoInterpolationDerivative<p + 1>(xi[0]);
99 auto dN_eta = GaussLobattoInterpolationDerivative<p + 1>(xi[1]);
103 tensor<double, ndof, dim> dN{};
104 for (
int j = 0; j < p + 1; j++) {
105 for (
int i = 0; i < p + 1; i++) {
106 dN[count++] = {dN_xi[i] * N_eta[j], N_xi[i] * dN_eta[j]};
122 template <
bool apply_weights,
int q>
123 static constexpr
auto calculate_B()
125 constexpr
auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
126 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
127 tensor<double, q, n> B{};
128 for (
int i = 0; i < q; i++) {
129 B[i] = GaussLobattoInterpolation<n>(points1D[i]);
130 if constexpr (apply_weights) B[i] = B[i] * weights1D[i];
145 template <
bool apply_weights,
int q>
146 static constexpr
auto calculate_G()
148 constexpr
auto points1D = GaussLegendreNodes<q, mfem::Geometry::SEGMENT>();
149 [[maybe_unused]] constexpr
auto weights1D = GaussLegendreWeights<q, mfem::Geometry::SEGMENT>();
150 tensor<double, q, n> G{};
151 for (
int i = 0; i < q; i++) {
152 G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
153 if constexpr (apply_weights) G[i] = G[i] * weights1D[i];
158 template <
typename in_t,
int q>
159 static auto batch_apply_shape_fn(
int j, tensor<in_t, q * q> input,
const TensorProductQuadratureRule<q>&)
161 static constexpr
bool apply_weights =
false;
162 static constexpr
auto B = calculate_B<apply_weights, q>();
163 static constexpr
auto G = calculate_G<apply_weights, q>();
168 using source_t = decltype(get<0>(get<0>(in_t{})) +
dot(get<1>(get<0>(in_t{})), tensor<double, 2>{}));
169 using flux_t = decltype(get<0>(get<1>(in_t{})) +
dot(get<1>(get<1>(in_t{})), tensor<double, 2>{}));
171 tensor<tuple<source_t, flux_t>, q * q> output;
173 for (
int qy = 0; qy < q; qy++) {
174 for (
int qx = 0; qx < q; qx++) {
175 double phi_j = B(qx, jx) * B(qy, jy);
176 tensor<double, dim> dphi_j_dxi = {G(qx, jx) * B(qy, jy), B(qx, jx) * G(qy, jy)};
179 const auto& d00 = get<0>(get<0>(input(Q)));
180 const auto& d01 = get<1>(get<0>(input(Q)));
181 const auto& d10 = get<0>(get<1>(input(Q)));
182 const auto& d11 = get<1>(get<1>(input(Q)));
184 output[Q] = {d00 * phi_j +
dot(d01, dphi_j_dxi), d10 * phi_j +
dot(d11, dphi_j_dxi)};
206 SERAC_HOST_DEVICE static auto interpolate(
const dof_type& X,
const TensorProductQuadratureRule<q>&)
208 static constexpr
bool apply_weights =
false;
209 static constexpr
auto B = calculate_B<apply_weights, q>();
210 static constexpr
auto G = calculate_G<apply_weights, q>();
212 tensor<double, c, q, q> value{};
213 tensor<double, c, dim, q, q> gradient{};
216 for (
int i = 0; i < c; i++) {
217 auto A0 = contract<1, 1>(X[i], B);
218 auto A1 = contract<1, 1>(X[i], G);
220 value(i) = contract<0, 1>(A0, B);
221 gradient(i, 0) = contract<0, 1>(A1, B);
222 gradient(i, 1) = contract<0, 1>(A0, G);
227 tensor<qf_input_type, q * q> one_dimensional;
228 tensor<tuple<tensor<double, c>, tensor<double, c, dim> >, q, q> two_dimensional;
231 for (
int qy = 0; qy < q; qy++) {
232 for (
int qx = 0; qx < q; qx++) {
233 for (
int i = 0; i < c; i++) {
234 get<VALUE>(output.two_dimensional(qy, qx))[i] = value(i, qy, qx);
235 for (
int j = 0; j < dim; j++) {
236 get<GRADIENT>(output.two_dimensional(qy, qx))[i][j] = gradient(i, j, qy, qx);
242 return output.one_dimensional;
248 template <
typename source_type,
typename flux_type,
int q>
250 const TensorProductQuadratureRule<q>&, dof_type* element_residual,
253 if constexpr (is_zero<source_type>{} && is_zero<flux_type>{}) {
257 constexpr
int ntrial =
std::max(
size(source_type{}),
size(flux_type{}) / dim) / c;
259 using s_buffer_type = std::conditional_t<is_zero<source_type>{}, zero, tensor<double, q, q> >;
260 using f_buffer_type = std::conditional_t<is_zero<flux_type>{}, zero, tensor<double, dim, q, q> >;
262 static constexpr
bool apply_weights =
true;
263 static constexpr
auto B = calculate_B<apply_weights, q>();
264 static constexpr
auto G = calculate_G<apply_weights, q>();
266 for (
int j = 0; j < ntrial; j++) {
267 for (
int i = 0; i < c; i++) {
268 s_buffer_type source;
271 for (
int qy = 0; qy < q; qy++) {
272 for (
int qx = 0; qx < q; qx++) {
273 [[maybe_unused]]
int Q = qy * q + qx;
274 if constexpr (!is_zero<source_type>{}) {
275 source(qy, qx) =
reinterpret_cast<const double*
>(&get<SOURCE>(qf_output[Q]))[i * ntrial + j];
278 if constexpr (!is_zero<flux_type>{}) {
279 for (
int k = 0; k < dim; k++) {
280 flux(k, qy, qx) =
reinterpret_cast<const double*
>(&get<FLUX>(qf_output[Q]))[(i * dim + k) * ntrial + j];
286 auto A0 = contract<1, 0>(source, B) + contract<1, 0>(flux(0), G);
287 auto A1 = contract<1, 0>(flux(1), B);
289 element_residual[j * step](i) += contract<0, 0>(A0, B) + contract<0, 0>(A1, G);
297 static SERAC_DEVICE auto interpolate(
const dof_type& X,
const tensor<double, dim, dim>& J,
298 const TensorProductQuadratureRule<q>& rule, cache_type<q>& A)
300 int tidx = threadIdx.x % q;
301 int tidy = threadIdx.x / q;
303 static constexpr
auto points1D = GaussLegendreNodes<q>();
304 static constexpr
auto B_ = [=]() {
305 tensor<double, q, n> B{};
306 for (
int i = 0; i < q; i++) {
307 B[i] = GaussLobattoInterpolation<n>(points1D[i]);
312 static constexpr
auto G_ = [=]() {
313 tensor<double, q, n> G{};
314 for (
int i = 0; i < q; i++) {
315 G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
320 __shared__ tensor<double, q, n> B;
321 __shared__ tensor<double, q, n> G;
322 for (
int entry = threadIdx.x; entry < n * q; entry += q * q) {
330 tuple<tensor<double, c>, tensor<double, c, dim> > qf_input{};
332 for (
int i = 0; i < c; i++) {
333 for (
int dy = tidy; dy < n; dy += q) {
334 for (
int qx = tidx; qx < q; qx += q) {
336 for (
int dx = 0; dx < n; dx++) {
337 sum[0] += B(qx, dx) * X(i, dy, dx);
338 sum[1] += G(qx, dx) * X(i, dy, dx);
340 A(0, dy, qx) = sum[0];
341 A(1, dy, qx) = sum[1];
346 for (
int qy = tidy; qy < q; qy += q) {
347 for (
int qx = tidx; qx < q; qx += q) {
348 for (
int dy = 0; dy < n; dy++) {
349 get<0>(qf_input)[i] += B(qy, dy) * A(0, dy, qx);
350 get<1>(qf_input)[i][0] += B(qy, dy) * A(1, dy, qx);
351 get<1>(qf_input)[i][1] += G(qy, dy) * A(0, dy, qx);
357 get<1>(qf_input) =
dot(get<1>(qf_input),
inv(J));
362 template <
typename T1,
typename T2,
int q>
363 static SERAC_DEVICE void integrate(tuple<T1, T2>& response,
const tensor<double, dim, dim>& J,
364 const TensorProductQuadratureRule<q>& rule, cache_type<q>& A, dof_type& residual)
366 int tidx = threadIdx.x % q;
367 int tidy = threadIdx.x / q;
369 static constexpr
auto points1D = GaussLegendreNodes<q>();
370 static constexpr
auto weights1D = GaussLegendreWeights<q>();
371 static constexpr
auto B_ = [=]() {
372 tensor<double, q, n> B{};
373 for (
int i = 0; i < q; i++) {
374 B[i] = GaussLobattoInterpolation<n>(points1D[i]);
379 static constexpr
auto G_ = [=]() {
380 tensor<double, q, n> G{};
381 for (
int i = 0; i < q; i++) {
382 G[i] = GaussLobattoInterpolationDerivative<n>(points1D[i]);
387 __shared__ tensor<double, q, n> B;
388 __shared__ tensor<double, q, n> G;
389 for (
int entry = threadIdx.x; entry < n * q; entry += q * q) {
397 auto dv =
det(J) * weights1D[tidx] * weights1D[tidy];
399 get<0>(response) = get<0>(response) * dv;
402 for (
int i = 0; i < c; i++) {
405 for (
int qy = tidy; qy < q; qy += q) {
406 for (
int dx = tidx; dx < n; dx += q) {
413 for (
int offset = 0; offset < n; offset++) {
414 int dx = (tidx + offset) % n;
415 auto sum = B(tidx, dx) * get<0>(response)(i) + G(tidx, dx) * get<1>(response)(i, 0);
416 atomicAdd(&A(0, dx, tidy), sum);
417 atomicAdd(&A(1, dx, tidy), B(tidx, dx) * get<1>(response)(i, 1));
421 for (
int dy = tidy; dy < n; dy += q) {
422 for (
int dx = tidx; dx < n; dx += q) {
424 for (
int qy = 0; qy < q; qy++) {
425 sum += B(qy, dy) * A(0, dx, qy);
426 sum += G(qy, dy) * A(1, dx, qy);
428 residual(i, dy, dx) += sum;
#define SERAC_DEVICE
Macro that evaluates to __device__ when compiling with nvcc and does nothing on a host compiler.
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE auto max(dual< gradient_type > a, double b)
Implementation of max for dual numbers.
constexpr SERAC_HOST_DEVICE int size(const tensor< T, n... > &)
returns the total number of stored values in a tensor
constexpr SERAC_HOST_DEVICE auto det(const isotropic_tensor< T, m, m > &I)
compute the determinant of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
constexpr SERAC_HOST_DEVICE auto inv(const isotropic_tensor< T, m, m > &I)
return the inverse of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto transpose(const isotropic_tensor< T, m, m > &I)
return the transpose of an isotropic tensor