Serac  0.1
Serac is an implicit thermal strucural mechanics simulation code.
functional.hpp
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Serac Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
13 #pragma once
14 
15 #include <algorithm>
16 #include <array>
17 #include <functional>
18 #include <memory>
19 #include <tuple>
20 #include <type_traits>
21 #include <utility>
22 #include <vector>
23 
24 #include "mfem.hpp"
25 
26 #include "serac/serac_config.hpp"
31 #include "serac/numerics/functional/integral.hpp"
32 #include "serac/numerics/functional/differentiate_wrt.hpp"
33 #include "serac/numerics/functional/element_restriction.hpp"
35 
36 namespace serac {
37 
39 constexpr int SOURCE = 0;
40 constexpr int FLUX = 1;
41 constexpr int VALUE = 0;
42 constexpr int DERIVATIVE = 1;
44 
45 template <int... i>
46 struct DependsOn {
47 };
48 
59 template <typename... T>
60 constexpr uint32_t index_of_differentiation()
61 {
62  constexpr uint32_t n = sizeof...(T);
63  bool matching[] = {std::is_same_v<T, differentiate_wrt_this>...};
64  for (uint32_t i = 0; i < n; i++) {
65  if (matching[i]) {
66  return i;
67  }
68  }
69  return NO_DIFFERENTIATION;
70 }
71 
75 template <int ind>
76 struct Index {
80  constexpr operator int() { return ind; }
81 };
82 
84 inline void check_for_missing_nodal_gridfunc(const mfem::Mesh& mesh)
85 {
86  if (mesh.GetNodes() == nullptr) {
87  SLIC_ERROR_ROOT(
88  R"errmsg(
89  the provided mesh does not have a nodal gridfunction.
90  If you created an mfem::Mesh manually, make sure that the
91  following member functions are invoked before use
92 
93  > mfem::Mesh::EnsureNodes();
94  > mfem::ParMesh::ExchangeFaceNbrData();
95 
96  or else the mfem::Mesh won't be fully initialized
97  )errmsg";);
98  }
99 }
100 
102 inline void check_for_unsupported_elements(const mfem::Mesh& mesh)
103 {
104  int num_elements = mesh.GetNE();
105  for (int e = 0; e < num_elements; e++) {
106  auto type = mesh.GetElementType(e);
107  if (type == mfem::Element::POINT || type == mfem::Element::WEDGE || type == mfem::Element::PYRAMID) {
108  SLIC_ERROR_ROOT("Mesh contains unsupported element type");
109  }
110  }
111 }
112 
121 template <typename function_space>
122 inline std::pair<std::unique_ptr<mfem::ParFiniteElementSpace>, std::unique_ptr<mfem::FiniteElementCollection>>
123 generateParFiniteElementSpace(mfem::ParMesh* mesh)
124 {
125  const int dim = mesh->Dimension();
126  std::unique_ptr<mfem::FiniteElementCollection> fec;
127 
128  switch (function_space::family) {
129  case Family::H1:
130  fec = std::make_unique<mfem::H1_FECollection>(function_space::order, dim);
131  break;
132  case Family::HCURL:
133  fec = std::make_unique<mfem::ND_FECollection>(function_space::order, dim);
134  break;
135  case Family::HDIV:
136  fec = std::make_unique<mfem::RT_FECollection>(function_space::order, dim);
137  break;
138  case Family::L2:
139  // We use GaussLobatto basis functions as this is what is used for the serac::Functional FE kernels
140  fec = std::make_unique<mfem::L2_FECollection>(function_space::order, dim, mfem::BasisType::GaussLobatto);
141  break;
142  default:
143  return {nullptr, nullptr};
144  }
145 
146  // NOTE: Clang-tidy complains about a potential leak of memory pointed at `fes` whenever its dereferenced,
147  // because MFEM still uses raw pointers. Assuming MFEM handles its pointers properly, it can be safely ignored.
148  auto fes =
149  std::make_unique<mfem::ParFiniteElementSpace>(mesh, fec.get(), function_space::components, serac::ordering);
150 
151  return std::pair(std::move(fes), std::move(fec));
152 }
153 
160 inline void updateFaceNbrData(const mfem::ParFiniteElementSpace* const_trial_space, mfem::ParGridFunction& trial_pgf,
161  mfem::Vector& trial_tdof_vals)
162 {
163  mfem::ParFiniteElementSpace* nonconst_trial_space = const_cast<mfem::ParFiniteElementSpace*>(const_trial_space);
164 
165  // Link the ParGridFunction to external FE space and data by reference
166  trial_pgf.MakeRef(nonconst_trial_space, trial_tdof_vals.GetData());
167 
168  // Exchange face nbr data to set the vector in ParGridFunction with the right size
169  // This should automatically invoke ExchangeFaceNbrData on the subsequent ParFESpace and ParMesh
170  trial_pgf.ExchangeFaceNbrData();
171 }
172 
177 inline void appendFaceNbrData(const mfem::ParFiniteElementSpace* trial_space, const mfem::ParGridFunction& trial_pgf,
178  const int LSize, mfem::Vector& input_L)
179 {
180  int num_ghost_elem = trial_space->GetParMesh()->GetNFaceNeighborElements();
181  int components_per_node = trial_space->GetVDim();
182  const mfem::Vector& face_neighbor_data = trial_pgf.FaceNbrData();
183 
184  int offset = 0;
185  for (int n = 0; n < num_ghost_elem; ++n) {
186  mfem::Array<int> old_shared_elem_vdof_ids;
187  trial_space->GetFaceNbrElementVDofs(n, old_shared_elem_vdof_ids);
188  int dofs_per_element = old_shared_elem_vdof_ids.Size() / components_per_node;
189 
190  // Put the entries into their new index
191  for (int m = 0; m < old_shared_elem_vdof_ids.Size(); ++m) {
192  // -------------- dofs before -------------- ---- component -----
193  int new_id = (m % dofs_per_element) * components_per_node + m / dofs_per_element + offset;
194  input_L[LSize + new_id] = face_neighbor_data(old_shared_elem_vdof_ids[m]);
195  }
196  // Increase offset for next ghost element
197  offset += old_shared_elem_vdof_ids.Size();
198  }
199 }
200 
204 inline void rearrangeFaceNbrDofGlobalIndex(const mfem::ParFiniteElementSpace* trial_space,
205  mfem::Array<HYPRE_BigInt>& face_nbr_glob_vdof_map)
206 {
207  int components_per_node = trial_space->GetVDim();
208  if (components_per_node == 1) {
209  face_nbr_glob_vdof_map = trial_space->face_nbr_glob_dof_map;
210  return;
211  }
212 
213  if (components_per_node > 1 && trial_space->GetOrdering() == mfem::Ordering::byNODES) {
214  SLIC_ERROR_ROOT("Unsupported: L2 vector field ordered by nodes");
215  }
216 
217  face_nbr_glob_vdof_map.SetSize(trial_space->face_nbr_glob_dof_map.Size());
218  int num_ghost_elem = trial_space->GetParMesh()->GetNFaceNeighborElements();
219  const HYPRE_BigInt* face_nbr_glob_dof_index = trial_space->face_nbr_glob_dof_map;
220 
221  int offset = 0;
222  for (int n = 0; n < num_ghost_elem; ++n) {
223  mfem::Array<int> old_shared_elem_vdof_ids;
224  trial_space->GetFaceNbrElementVDofs(n, old_shared_elem_vdof_ids);
225  int dofs_per_element = old_shared_elem_vdof_ids.Size() / components_per_node;
226 
227  // Put the entries into their new index
228  for (int m = 0; m < old_shared_elem_vdof_ids.Size(); ++m) {
229  int new_id = (m % dofs_per_element) * components_per_node + m / dofs_per_element + offset;
230  face_nbr_glob_vdof_map[new_id] = face_nbr_glob_dof_index[old_shared_elem_vdof_ids[m]];
231  }
232  // Increase offset for next ghost element
233  offset += old_shared_elem_vdof_ids.Size();
234  }
235 }
236 
238 template <typename T, ExecutionSpace exec = serac::default_execution_space>
239 class Functional;
241 
282 template <typename test, typename... trials, ExecutionSpace exec>
283 class Functional<test(trials...), exec> {
284  static constexpr tuple<trials...> trial_spaces{};
285  static constexpr uint32_t num_trial_spaces = sizeof...(trials);
286  static constexpr auto Q = std::max({test::order, trials::order...}) + 1;
287 
288  static constexpr mfem::Geometry::Type elem_geom[4] = {mfem::Geometry::INVALID, mfem::Geometry::SEGMENT,
289  mfem::Geometry::SQUARE, mfem::Geometry::CUBE};
290  static constexpr mfem::Geometry::Type simplex_geom[4] = {mfem::Geometry::INVALID, mfem::Geometry::SEGMENT,
291  mfem::Geometry::TRIANGLE, mfem::Geometry::TETRAHEDRON};
292 
293  class Gradient;
294 
295  // clang-format off
296  template <uint32_t i>
297  struct operator_paren_return {
298  using type = typename std::conditional<
299  i == NO_DIFFERENTIATION, // if `i` indicates that we want to skip differentiation
300  mfem::Vector&, // we just return the value
301  serac::tuple<mfem::Vector&, Gradient&> // otherwise we return the value and the derivative w.r.t arg `i`
302  >::type;
303  };
304  // clang-format on
305 
306  public:
312  Functional(const mfem::ParFiniteElementSpace* test_fes,
313  std::array<const mfem::ParFiniteElementSpace*, num_trial_spaces> trial_fes)
314  : update_qdata_(false), test_space_(test_fes), trial_space_(trial_fes), mem_type(mfem::Device::GetMemoryType())
315  {
317 
318  test_function_space_ = {test::family, test::order, test::components};
319 
320  std::array<Family, num_trial_spaces> trial_families = {trials::family...};
321  std::array<int, num_trial_spaces> trial_orders = {trials::order...};
322  std::array<int, num_trial_spaces> trial_components = {trials::components...};
323 
324  for (uint32_t i = 0; i < num_trial_spaces; i++) {
325  trial_function_spaces_[i] = {trial_families[i], trial_orders[i], trial_components[i]};
326 
327  P_trial_[i] = trial_space_[i]->GetProlongationMatrix();
328 
329  // For L2 spaces, configure a ParGridFunction with external data to exchange face neighbor data.
330  // This is for DG method where interior faces on the processor boundary need to access data on
331  // the neighrbor element
332  if (trial_function_spaces_[i].family == Family::L2) {
333  // Set the vector size to store tdof data
334  input_ldof_values_[i].SetSize(P_trial_[i]->Height(), mem_type);
335 
336  // Initialize face neighbor data vector
337  updateFaceNbrData(trial_space_[i], trial_pargrid_functions_[i], input_ldof_values_[i]);
338 
339  // Set the local input vector size to store local + ghost data
340  input_L_[i].SetSize(input_ldof_values_[i].Size() + trial_pargrid_functions_[i].FaceNbrData().Size(), mem_type);
341 
342  // Rearrange the global index of face neighbor vdofs to the correct ordering.
343  // This only needs to be done once
344  rearrangeFaceNbrDofGlobalIndex(trial_space_[i], face_nbr_glob_vdof_maps_[i]);
345  } else {
346  input_L_[i].SetSize(P_trial_[i]->Height(), mem_type);
347  }
348 
349  // create the necessary number of empty mfem::Vectors, to be resized later
350  input_E_.push_back({});
351  input_E_buffer_.push_back({});
352  }
353 
354  // for (auto type : {Domain::Type::Elements, Domain::Type::BoundaryElements, Domain::Type::InteriorFaces}) {
355  // output_E_[type].Update(G_test_[type].bOffsets(), mem_type);
356  // }
357 
358  P_test_ = test_space_->GetProlongationMatrix();
359 
360  if (test_function_space_.family == Family::L2) {
361  // We only need these variables to set the output_L_ vector to the right size
362  mfem::ParGridFunction X_pgf;
363  mfem::Vector X_ldof_values(P_test_->Height(), mem_type);
364 
365  updateFaceNbrData(test_space_, X_pgf, X_ldof_values);
366  output_L_.SetSize(X_ldof_values.Size() + X_pgf.FaceNbrData().Size(), mem_type);
367  } else {
368  output_L_.SetSize(P_test_->Height(), mem_type);
369  }
370 
371  output_T_.SetSize(test_fes->GetTrueVSize(), mem_type);
372 
373  // gradient objects depend on some member variables in
374  // Functional, so we initialize the gradient objects last
375  // to ensure that those member variables are initialized first
376  for (uint32_t i = 0; i < num_trial_spaces; i++) {
377  grad_.emplace_back(*this, i);
378  }
379  }
380 
392  template <int dim, int... args, typename lambda, typename qpt_data_type = Nothing>
393  void AddDomainIntegral(Dimension<dim>, DependsOn<args...>, const lambda& integrand, Domain& domain,
394  std::shared_ptr<QuadratureData<qpt_data_type>> qdata = NoQData)
395  {
396  if (domain.mesh_.GetNE() == 0) return;
397 
398  SLIC_ERROR_ROOT_IF(dim != domain.mesh_.Dimension(), "invalid mesh dimension for domain integral");
399 
400  check_for_unsupported_elements(domain.mesh_);
401  check_for_missing_nodal_gridfunc(domain.mesh_);
402 
403  std::vector<uint32_t> arg_vec = {args...};
404  for (uint32_t i : arg_vec) {
405  domain.insert_restriction(trial_space_[i], trial_function_spaces_[i]);
406  }
407  domain.insert_restriction(test_space_, test_function_space_);
408 
409  using signature = test(decltype(serac::type<args>(trial_spaces))...);
410  integrals_.push_back(
411  MakeDomainIntegral<signature, Q, dim>(domain, integrand, qdata, std::vector<uint32_t>{args...}));
412  }
413 
423  template <int dim, int... args, typename lambda>
424  void AddBoundaryIntegral(Dimension<dim>, DependsOn<args...>, const lambda& integrand, Domain& domain)
425  {
426  auto num_bdr_elements = domain.mesh_.GetNBE();
427  if (num_bdr_elements == 0) return;
428 
429  SLIC_ERROR_ROOT_IF(dim != domain.dim_, "invalid domain of integration for boundary integral");
430 
431  check_for_missing_nodal_gridfunc(domain.mesh_);
432 
433  std::vector<uint32_t> arg_vec = {args...};
434  for (uint32_t i : arg_vec) {
435  domain.insert_restriction(trial_space_[i], trial_function_spaces_[i]);
436  }
437  domain.insert_restriction(test_space_, test_function_space_);
438 
439  using signature = test(decltype(serac::type<args>(trial_spaces))...);
440  integrals_.push_back(MakeBoundaryIntegral<signature, Q, dim>(domain, integrand, std::vector<uint32_t>{args...}));
441  }
442 
446  template <int dim, int... args, typename Integrand>
447  void AddInteriorFaceIntegral(Dimension<dim>, DependsOn<args...>, const Integrand& integrand, Domain& domain)
448  {
449  check_for_missing_nodal_gridfunc(domain.mesh_);
450 
451  std::vector<uint32_t> arg_vec = {args...};
452  for (uint32_t i : arg_vec) {
453  domain.insert_restriction(trial_space_[i], trial_function_spaces_[i]);
454  }
455  domain.insert_restriction(test_space_, test_function_space_);
456 
457  using signature = test(decltype(serac::type<args>(trial_spaces))...);
458  integrals_.push_back(
459  MakeInteriorFaceIntegral<signature, Q, dim>(domain, integrand, std::vector<uint32_t>{args...}));
460  }
461 
471  template <int... args, typename lambda, typename qpt_data_type = Nothing>
472  void AddAreaIntegral(DependsOn<args...> which_args, const lambda& integrand, Domain& domain,
473  std::shared_ptr<QuadratureData<qpt_data_type>> data = NoQData)
474  {
475  AddDomainIntegral(Dimension<2>{}, which_args, integrand, domain, data);
476  }
477 
487  template <int... args, typename lambda, typename qpt_data_type = Nothing>
488  void AddVolumeIntegral(DependsOn<args...> which_args, const lambda& integrand, Domain& domain,
489  std::shared_ptr<QuadratureData<qpt_data_type>> data = NoQData)
490  {
491  AddDomainIntegral(Dimension<3>{}, which_args, integrand, domain, data);
492  }
493 
495  template <int... args, typename lambda>
496  void AddSurfaceIntegral(DependsOn<args...> which_args, const lambda& integrand, Domain& domain)
497  {
498  AddBoundaryIntegral(Dimension<2>{}, which_args, integrand, domain);
499  }
500 
512  void ActionOfGradient(const mfem::Vector& input_T, mfem::Vector& output_T, uint32_t which) const
513  {
514  // Please refer to 'operator()' below for detailed comments of this implementation
515  if (trial_function_spaces_[which].family == Family::L2) {
516  // copy input_L[which] and facenbrdata[which] into a common array like:
517  // first part second part
518  // [ --- L --- | --- FND --- ]
519  P_trial_[which]->Mult(input_T, input_ldof_values_[which]);
520  input_L_[which].SetVector(input_ldof_values_[which], 0);
521  updateFaceNbrData(trial_space_[which], trial_pargrid_functions_[which], input_ldof_values_[which]);
522 
523  // Ensure ghost data ordering is consistent with local data
524  if (trial_space_[which]->GetVDim() == 1) {
525  input_L_[which].SetVector(trial_pargrid_functions_[which].FaceNbrData(), input_ldof_values_[which].Size());
526  } else {
527  if (trial_space_[which]->GetOrdering() == mfem::Ordering::byVDIM) {
528  int local_size = input_ldof_values_[which].Size();
529  appendFaceNbrData(trial_space_[which], trial_pargrid_functions_[which], local_size, input_L_[which]);
530  } else {
531  SLIC_ERROR_ROOT("Unsupported: L2 vector field ordered by nodes");
532  }
533  }
534  } else {
535  P_trial_[which]->Mult(input_T, input_L_[which]);
536  }
537 
538  output_L_ = 0.0;
539 
540  for (auto& integral : integrals_) {
541  if (integral.DependsOn(which)) {
542  Domain& dom = integral.domain_;
543 
544  const serac::BlockElementRestriction& G_trial = dom.get_restriction(trial_function_spaces_[which]);
545  input_E_buffer_[which].SetSize(int(G_trial.ESize()));
546  input_E_[which].Update(input_E_buffer_[which], G_trial.bOffsets());
547  G_trial.Gather(input_L_[which], input_E_[which]);
548 
549  const serac::BlockElementRestriction& G_test = dom.get_restriction(test_function_space_);
550  output_E_buffer_.SetSize(int(G_test.ESize()));
551  output_E_.Update(output_E_buffer_, G_test.bOffsets());
552 
553  integral.GradientMult(input_E_[which], output_E_, which);
554 
555  // scatter-add to compute residuals on the local processor
556  G_test.ScatterAdd(output_E_, output_L_);
557  }
558  }
559 
560  // scatter-add to compute global residuals
561  if (test_function_space_.family == Family::L2) {
562  // Extract a subvector from output_L_ for local residuals excluding ghost dofs for L2 spaces
563  // The output vector is in form [ --- L --- | --- FND --- ]
564  mfem::Vector output_ldof_values(P_test_->Height(), mem_type);
565  for (int j = 0; j < output_ldof_values.Size(); ++j) {
566  output_ldof_values[j] = output_L_[j];
567  }
568  P_test_->MultTranspose(output_ldof_values, output_T);
569  } else {
570  P_test_->MultTranspose(output_L_, output_T);
571  }
572  }
573 
586  template <uint32_t wrt, typename... T>
587  typename operator_paren_return<wrt>::type operator()(DifferentiateWRT<wrt>, double t, const T&... args)
588  {
589  const mfem::Vector* input_T[] = {&static_cast<const mfem::Vector&>(args)...};
590 
591  // get the values for each local processor
592  for (uint32_t i = 0; i < num_trial_spaces; i++) {
593  if (trial_function_spaces_[i].family == Family::L2) {
594  // copy input_L[i] and facenbrdata[i] into a common array like:
595  // first part second part
596  // [ --- L --- | --- FND --- ]
597  P_trial_[i]->Mult(*input_T[i], input_ldof_values_[i]);
598 
599  // MakeRef from mfem::Vector will actually delete the data in the original vector, so we assign the
600  // values directly by SetVector, which invokes additional operations. This can be optimized later.
601  input_L_[i].SetVector(input_ldof_values_[i], 0);
602 
603  // Update again with new data
604  updateFaceNbrData(trial_space_[i], trial_pargrid_functions_[i], input_ldof_values_[i]);
605 
606  // Set the values of neighbor ghost dofs
607  // The entries in face_nbr_data vector is ALWAYS arranged byNODES per "volumetric" element.
608  // For example with two quadrilateral ghost elements we would have such face_nbr_data
609  // ----------- Ghost elem 1 ---------- ----------- Ghost elem 2 ----------
610  // {X1 X1 X1 X1 Y1 Y1 Y1 Y1 Z1 Z1 Z1 Z1 X2 X2 X2 X2 Y2 Y2 Y2 Y2 Z2 Z2 Z2 Z2}
611  //
612  // This is because 1) mfem prepares face neighbor data element by element to communicate later (standard).
613  // 2) mfem uses GetElementVDofs to gather the data for communication which returns a set of indices ALWAYS
614  // ordered byNODES (???). If the ordering we set for ParFiniteElementSpace is byVDIM, we need the entries to be
615  // ----------- Ghost elem 1 ---------- ----------- Ghost elem 2 ----------
616  // {X1 Y1 Z1 X1 Y1 Z1 X1 Y1 Z1 X1 Y1 Z1 X2 Y2 Z2 X2 Y2 Z2 X2 Y2 Z2 X2 Y2 Z2}
617  // so it's consistent with the local data. Therefore, we need to manually change this ordering.
618  if (trial_space_[i]->GetVDim() == 1) {
619  // For scalar fields, this weird ordering doesn't cause any problems.
620  input_L_[i].SetVector(trial_pargrid_functions_[i].FaceNbrData(), input_ldof_values_[i].Size());
621  } else {
622  if (trial_space_[i]->GetOrdering() == mfem::Ordering::byVDIM) {
623  // For vector fields with byVDIM ordering, we manually set the entries in VDIM order. By the way this
624  // is really annoying because we have to get face neighbor vdof indices again in element restriction.
625  int local_size = input_ldof_values_[i].Size();
626  appendFaceNbrData(trial_space_[i], trial_pargrid_functions_[i], local_size, input_L_[i]);
627  } else {
628  // For vector fields with byNODES ordering, we need to prepare the local input vector in the form
629  // { true_comp_X ghost_comp_X true_comp_Y ghost_comp_Y true_comp_Z ghost_comp_Z }.
630  // In this form, to ensure the correct mapping between dof and vdof, we need to change the ndofs in
631  // FiniteElementSpace to include ones from ghost element. Additionally, we need to prepare
632  // ghost_comp_X / ghost_comp_Y / ghost_comp_Z from element wise entries in face_nbr_data,
633  // eg. ghost_comp_X includes X component entries of all ghost element dofs.
634  // This requires significant renumbering of the dofs and therefore is not supported for now.
635  SLIC_ERROR_ROOT("Unsupported: L2 vector field ordered by nodes");
636  }
637  }
638  } else {
639  P_trial_[i]->Mult(*input_T[i], input_L_[i]);
640  }
641  }
642 
643  output_L_ = 0.0;
644 
645  for (auto& integral : integrals_) {
646  Domain& dom = integral.domain_;
647 
648  const serac::BlockElementRestriction& G_test = dom.get_restriction(test_function_space_);
649 
650  for (auto i : integral.active_trial_spaces_) {
651  const serac::BlockElementRestriction& G_trial = dom.get_restriction(trial_function_spaces_[i]);
652  input_E_buffer_[i].SetSize(int(G_trial.ESize()));
653  input_E_[i].Update(input_E_buffer_[i], G_trial.bOffsets());
654  G_trial.Gather(input_L_[i], input_E_[i]);
655  }
656 
657  output_E_buffer_.SetSize(int(G_test.ESize()));
658  output_E_.Update(output_E_buffer_, G_test.bOffsets());
659  integral.Mult(t, input_E_, output_E_, wrt, update_qdata_);
660 
661  // scatter-add to compute residuals on the local processor
662  G_test.ScatterAdd(output_E_, output_L_);
663  }
664 
665  // scatter-add to compute global residuals
666  if (test_function_space_.family == Family::L2) {
667  // Extract a subvector from output_L_ for local residuals excluding ghost dofs for L2 spaces
668  // The output vector is in form [ --- L --- | --- FND --- ]
669  mfem::Vector output_ldof_values(P_test_->Height(), mem_type);
670  for (int j = 0; j < output_ldof_values.Size(); ++j) {
671  output_ldof_values[j] = output_L_[j];
672  }
673  P_test_->MultTranspose(output_ldof_values, output_T_);
674  } else {
675  P_test_->MultTranspose(output_L_, output_T_);
676  }
677 
678  if constexpr (wrt != NO_DIFFERENTIATION) {
679  // if the user has indicated they'd like to evaluate and differentiate w.r.t.
680  // a specific argument, then we return both the value and gradient w.r.t. that argument
681  //
682  // mfem::Vector arg0 = ...;
683  // mfem::Vector arg1 = ...;
684  // e.g. auto [value, gradient_wrt_arg1] = my_functional(arg0, differentiate_wrt(arg1));
685  return {output_T_, grad_[wrt]};
686  }
687 
688  if constexpr (wrt == NO_DIFFERENTIATION) {
689  // if the user passes only `mfem::Vector`s then we assume they only want the output value
690  //
691  // mfem::Vector arg0 = ...;
692  // mfem::Vector arg1 = ...;
693  // e.g. mfem::Vector value = my_functional(arg0, arg1);
694  return output_T_;
695  }
696  }
697 
699  template <typename... T>
700  auto operator()(double t, const T&... args)
701  {
702  constexpr int num_differentiated_arguments = (std::is_same_v<T, differentiate_wrt_this> + ...);
703  static_assert(num_differentiated_arguments <= 1,
704  "Error: Functional::operator() can only differentiate w.r.t. 1 argument a time");
705  static_assert(sizeof...(T) == num_trial_spaces,
706  "Error: Functional::operator() must take exactly as many arguments as trial spaces");
707 
708  [[maybe_unused]] constexpr uint32_t i = index_of_differentiation<T...>();
709 
710  return (*this)(DifferentiateWRT<i>{}, t, args...);
711  }
712 
721  void updateQdata(bool update_flag) { update_qdata_ = update_flag; }
722 
723  private:
725  bool update_qdata_;
726 
732  class Gradient : public mfem::Operator {
733  public:
738  Gradient(Functional<test(trials...), exec>& f, uint32_t which = 0)
739  : mfem::Operator(f.test_space_->GetTrueVSize(), f.trial_space_[which]->GetTrueVSize()),
740  form_(f),
741  which_argument(which),
742  test_space_(f.test_space_),
743  trial_space_(f.trial_space_[which]),
744  df_(f.test_space_->GetTrueVSize())
745  {
747  }
748 
754  virtual void Mult(const mfem::Vector& dx, mfem::Vector& df) const override
755  {
756  form_.ActionOfGradient(dx, df, which_argument);
757  }
758 
760  mfem::Vector& operator()(const mfem::Vector& dx)
761  {
762  form_.ActionOfGradient(dx, df_, which_argument);
763  return df_;
764  }
765 
766  void initialize_sparsity_pattern()
767  {
768  using row_col = std::tuple<int, int>;
769 
770  std::set<row_col> nonzero_entries;
771 
772  for (auto& integral : form_.integrals_) {
773  if (integral.DependsOn(which_argument)) {
774  Domain& dom = integral.domain_;
775  const auto& G_test = dom.get_restriction(form_.test_function_space_);
776  const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
777  for (const auto& [geom, test_restriction] : G_test.restrictions) {
778  const auto& trial_restriction = G_trial.restrictions.at(geom);
779 
780  // the degrees of freedom associated with the rows/columns of the e^th element stiffness matrix
781  std::vector<int> test_vdofs(test_restriction.nodes_per_elem * test_restriction.components);
782  std::vector<int> trial_vdofs(trial_restriction.nodes_per_elem * trial_restriction.components);
783 
784  auto num_elements = static_cast<uint32_t>(test_restriction.num_elements);
785  for (uint32_t e = 0; e < num_elements; e++) {
786  for (uint32_t i = 0; i < test_restriction.nodes_per_elem; i++) {
787  auto test_dof = test_restriction.dof_info(e, i);
788  for (uint32_t j = 0; j < test_restriction.components; j++) {
789  test_vdofs[i * test_restriction.components + j] = int(test_restriction.GetVDof(test_dof, j).index());
790  }
791  }
792 
793  for (uint32_t i = 0; i < trial_restriction.nodes_per_elem; i++) {
794  auto trial_dof = trial_restriction.dof_info(e, i);
795  for (uint32_t j = 0; j < trial_restriction.components; j++) {
796  trial_vdofs[i * trial_restriction.components + j] =
797  int(trial_restriction.GetVDof(trial_dof, j).index());
798  }
799  }
800 
801  for (int row : test_vdofs) {
802  for (int col : trial_vdofs) {
803  nonzero_entries.insert({row, col});
804  }
805  }
806  }
807  }
808  }
809  }
810 
811  uint64_t nnz = nonzero_entries.size();
812  int nrows = form_.output_L_.Size();
813 
814  row_ptr.resize(uint32_t(nrows + 1));
815  col_ind.resize(nnz);
816 
817  int nz = 0;
818  int last_row = -1;
819  for (auto [row, col] : nonzero_entries) {
820  col_ind[uint32_t(nz)] = col;
821  for (int i = last_row + 1; i <= row; i++) {
822  row_ptr[uint32_t(i)] = nz;
823  }
824  last_row = row;
825  nz++;
826  }
827  for (int i = last_row + 1; i <= nrows; i++) {
828  row_ptr[uint32_t(i)] = nz;
829  }
830  };
831 
832  uint64_t max_buffer_size()
833  {
834  uint64_t max_entries = 0;
835  for (auto& integral : form_.integrals_) {
836  if (integral.DependsOn(which_argument)) {
837  Domain& dom = integral.domain_;
838  const auto& G_test = dom.get_restriction(form_.test_function_space_);
839  const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
840  for (const auto& [geom, test_restriction] : G_test.restrictions) {
841  const auto& trial_restriction = G_trial.restrictions.at(geom);
842  uint64_t nrows_per_element = test_restriction.nodes_per_elem * test_restriction.components;
843  uint64_t ncols_per_element = trial_restriction.nodes_per_elem * trial_restriction.components;
844  uint64_t entries_per_element = nrows_per_element * ncols_per_element;
845  uint64_t entries_needed = test_restriction.num_elements * entries_per_element;
846  max_entries = std::max(entries_needed, max_entries);
847  }
848  }
849  }
850  return max_entries;
851  }
852 
853  std::unique_ptr<mfem::HypreParMatrix> assemble()
854  {
855  if (row_ptr.empty()) {
856  initialize_sparsity_pattern();
857  }
858 
859  // since we own the storage for row_ptr, col_ind, values,
860  // we ask mfem to not deallocate those pointers in the SparseMatrix dtor
861  constexpr bool sparse_matrix_frees_graph_ptrs = false;
862  constexpr bool sparse_matrix_frees_values_ptr = false;
863  constexpr bool col_ind_is_sorted = true;
864 
865  // note: we make a copy of col_ind since mfem::HypreParMatrix
866  // changes it in the constructor
867  std::vector<int> col_ind_copy = col_ind;
868 
869  int nnz = row_ptr.back();
870  std::vector<double> values(uint32_t(nnz), 0.0);
871  auto A_local = mfem::SparseMatrix(row_ptr.data(), col_ind_copy.data(), values.data(), form_.output_L_.Size(),
872  form_.input_L_[which_argument].Size(), sparse_matrix_frees_graph_ptrs,
873  sparse_matrix_frees_values_ptr, col_ind_is_sorted);
874 
875  std::vector<double> K_elem_buffer(max_buffer_size());
876 
877  for (auto& integral : form_.integrals_) {
878  // if this integral's derivative isn't identically zero
879  if (integral.functional_to_integral_index_.count(which_argument) > 0) {
880  Domain& dom = integral.domain_;
881 
882  uint32_t id = integral.functional_to_integral_index_.at(which_argument);
883  const auto& G_test = dom.get_restriction(form_.test_function_space_);
884  const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
885  for (const auto& [geom, calculate_element_matrices_func] : integral.element_gradient_[id]) {
886  const auto& test_restriction = G_test.restrictions.at(geom);
887  const auto& trial_restriction = G_trial.restrictions.at(geom);
888 
889  // prepare a buffer to hold the element matrices
890  CPUArrayView<double, 3> K_e(K_elem_buffer.data(), test_restriction.num_elements,
891  trial_restriction.nodes_per_elem * trial_restriction.components,
892  test_restriction.nodes_per_elem * test_restriction.components);
893  detail::zero_out(K_e);
894 
895  // perform the actual calculations
896  calculate_element_matrices_func(K_e);
897 
898  const std::vector<int>& element_ids = integral.domain_.get(geom);
899 
900  uint32_t rows_per_elem = uint32_t(test_restriction.nodes_per_elem * test_restriction.components);
901  uint32_t cols_per_elem = uint32_t(trial_restriction.nodes_per_elem * trial_restriction.components);
902 
903  std::vector<DoF> test_vdofs(rows_per_elem);
904  std::vector<DoF> trial_vdofs(cols_per_elem);
905 
906  for (uint32_t e = 0; e < element_ids.size(); e++) {
907  test_restriction.GetElementVDofs(int(e), test_vdofs);
908  trial_restriction.GetElementVDofs(int(e), trial_vdofs);
909 
910  for (uint32_t i = 0; i < cols_per_elem; i++) {
911  int col = int(trial_vdofs[i].index());
912 
913  for (uint32_t j = 0; j < rows_per_elem; j++) {
914  int row = int(test_vdofs[j].index());
915  A_local.SearchRow(row, col) += K_e(e, i, j);
916  }
917  }
918  }
919  }
920  }
921  }
922 
923  auto* R = form_.test_space_->Dof_TrueDof_Matrix();
924 
925  // If either test_space_ or trial_space_ is L2, the local matrix (A_local) will includ ghost rows and columns like
926  // ------------ -------
927  // | || |
928  // | diagonal || off-d |
929  // | block || block |
930  // | || |
931  // ------------ -------
932  // ------------ -------
933  // | ghost || XXX |
934  // | rows || XXX |
935  // ------------ -------
936  // We only need
937  // ------------ -------
938  // | || |
939  // | diagonal || off-d |
940  // | block || block |
941  // | || |
942  // ------------ -------
943  // to compute local residual and the neighbor dof values will be communicated to multiply into off-digonal block.
944  // So we construct a HypreParMatrix that contains the off-diagonal block from the local sparse matrix
945  mfem::HypreParMatrix* A_hypre;
946  if (dynamic_cast<const mfem::L2_FECollection*>(test_space_->FEColl()) ||
947  dynamic_cast<const mfem::L2_FECollection*>(trial_space_->FEColl())) {
948  int lrows = test_space_->GetVSize();
949  int lcols = trial_space_->GetVSize();
950  HYPRE_BigInt col_offset = trial_space_->GetMyDofOffset();
951  mfem::Array<HYPRE_BigInt> glob_J(A_local.NumNonZeroElems());
952 
953  int* J = A_local.GetJ();
954  for (int j = 0; j < glob_J.Size(); ++j) {
955  if (J[j] < lcols) {
956  glob_J[j] = J[j] + col_offset;
957  } else {
958  glob_J[j] = form_.face_nbr_glob_vdof_maps_[which_argument][J[j] - lcols];
959  }
960  }
961  A_hypre = new mfem::HypreParMatrix(test_space_->GetComm(), lrows, test_space_->GlobalVSize(),
962  trial_space_->GlobalVSize(), A_local.GetI(), glob_J, A_local.GetData(),
963  test_space_->GetDofOffsets(), trial_space_->GetDofOffsets());
964  glob_J.DeleteAll();
965  } else {
966  A_hypre =
967  new mfem::HypreParMatrix(test_space_->GetComm(), test_space_->GlobalVSize(), trial_space_->GlobalVSize(),
968  test_space_->GetDofOffsets(), trial_space_->GetDofOffsets(), &A_local);
969  }
970 
971  auto* P = trial_space_->Dof_TrueDof_Matrix();
972 
973  std::unique_ptr<mfem::HypreParMatrix> A(mfem::RAP(R, A_hypre, P));
974 
975  delete A_hypre;
976 
977  return A;
978  };
979 
980  friend auto assemble(Gradient& g) { return g.assemble(); }
981 
982  private:
984  Functional<test(trials...), exec>& form_;
985 
986  std::vector<int> row_ptr;
987  std::vector<int> col_ind;
988 
999  uint32_t which_argument;
1000 
1002  const mfem::ParFiniteElementSpace* test_space_;
1003 
1005  const mfem::ParFiniteElementSpace* trial_space_;
1006 
1008  mfem::Vector df_;
1009  };
1010 
1012  const mfem::ParFiniteElementSpace* test_space_;
1013 
1015  std::array<const mfem::ParFiniteElementSpace*, num_trial_spaces> trial_space_;
1016 
1017  std::array<FunctionSpace, num_trial_spaces> trial_function_spaces_;
1018  FunctionSpace test_function_space_;
1019 
1021  std::array<mfem::Array<HYPRE_BigInt>, num_trial_spaces> face_nbr_glob_vdof_maps_;
1022 
1024  mutable std::array<mfem::ParGridFunction, num_trial_spaces> trial_pargrid_functions_;
1025 
1027  mutable std::array<mfem::Vector, num_trial_spaces> input_ldof_values_;
1028 
1033  const mfem::Operator* P_trial_[num_trial_spaces];
1034 
1036  mutable mfem::Vector input_L_[num_trial_spaces];
1037 
1038  mutable std::vector<mfem::Vector> input_E_buffer_;
1039  mutable std::vector<mfem::BlockVector> input_E_;
1040 
1041  mutable std::vector<Integral> integrals_;
1042 
1043  mutable mfem::Vector output_E_buffer_;
1044  mutable mfem::BlockVector output_E_;
1045 
1047  mutable mfem::Vector output_L_;
1048 
1049  const mfem::Operator* P_test_;
1050 
1052  mutable mfem::Vector output_T_;
1053 
1055  mutable std::vector<Gradient> grad_;
1056 
1057  const mfem::MemoryType mem_type;
1058 };
1059 
1060 } // namespace serac
1061 
1062 #include "functional_qoi.inl"
many of the functions in this file amount to extracting element indices from an mesh_t like
This file contains helper traits and enumerations for classifying finite elements.
a specialization of serac::Functional for quantities of interest
This file contains the all the necessary functions and macros required for logging as well as a helpe...
Accelerator functionality.
Definition: serac.cpp:36
std::shared_ptr< QuadratureData< Nothing > > NoQData
a single instance of a QuadratureData container of Nothings, since they are all interchangeable
SERAC_HOST_DEVICE auto max(dual< gradient_type > a, double b)
Implementation of max for dual numbers.
Definition: dual.hpp:229
void updateFaceNbrData(const mfem::ParFiniteElementSpace *const_trial_space, mfem::ParGridFunction &trial_pgf, mfem::Vector &trial_tdof_vals)
helper function to locally cast away const on FE space so we can update face neighbor data with Excha...
Definition: functional.hpp:151
void check_for_missing_nodal_gridfunc(const mfem::Mesh &mesh)
function for verifying that the mesh has been fully initialized
Definition: functional.hpp:84
constexpr uint32_t index_of_differentiation()
given a list of types, this function returns the index that corresponds to the type dual_vector.
Definition: functional.hpp:60
void appendFaceNbrData(const mfem::ParFiniteElementSpace *trial_space, const mfem::ParGridFunction &trial_pgf, const int LSize, mfem::Vector &input_L)
helper functional to reorder the ordering of FaceNbrData for L2 space to byVDIM and append this vecto...
Definition: functional.hpp:168
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:379
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
Definition: accelerator.hpp:73
void check_for_unsupported_elements(const mfem::Mesh &mesh)
function for verifying that there are no unsupported element types in the mesh
Definition: functional.hpp:93
void rearrangeFaceNbrDofGlobalIndex(const mfem::ParFiniteElementSpace *trial_space, mfem::Array< HYPRE_BigInt > &face_nbr_glob_vdof_map)
helper functional to reorder the face_nbr_glob_dof_map for L2 space to byVDIM
Definition: functional.hpp:195
std::pair< std::unique_ptr< mfem::ParFiniteElementSpace >, std::unique_ptr< mfem::FiniteElementCollection > > generateParFiniteElementSpace(mfem::ParMesh *mesh)
create an mfem::ParFiniteElementSpace from one of serac's tag types: H1, Hcurl, L2
Definition: functional.hpp:114
#define SERAC_MARK_FUNCTION
Definition: profiling.hpp:90
Definitions of quadrature rules for quads and hexes.
a generalization of mfem::ElementRestriction that works with multiple kinds of element geometries....
mfem::Array< int > bOffsets() const
block offsets used when constructing mfem::HypreParVectors
void ScatterAdd(const mfem::BlockVector &E_block_vector, mfem::Vector &L_vector) const
"E->L" in mfem parlance, each element scatter-adds its local vector into the appropriate place in the...
uint64_t ESize() const
the size of the "E-vector" associated with this restriction operator
std::map< mfem::Geometry::Type, ElementRestriction > restrictions
the individual ElementRestriction operators for each element geometry
void Gather(const mfem::Vector &L_vector, mfem::BlockVector &E_block_vector) const
"L->E" in mfem parlance, each element gathers the values that belong to it, and stores them in the "E...
Compile-time alias for a dimension.
Definition: geometry.hpp:17
a class for representing a geometric region that can be used for integration
Definition: domain.hpp:33
Compile-time alias for index of differentiation.
Definition: functional.hpp:76
these classes are a little confusing. These two special types represent the similar (but different) c...
A class for storing and access user-defined types at quadrature points.
This is a class that mimics most of std::tuple's interface, except that it is usable in CUDA kernels ...
Definition: tuple.hpp:28
Implementation of the tensor class used by Functional.