Smith  0.1
Smith is an implicit thermal structural mechanics simulation code.
functional.hpp
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Smith Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
13 #pragma once
14 
15 #include <algorithm>
16 #include <array>
17 #include <functional>
18 #include <memory>
19 #include <tuple>
20 #include <type_traits>
21 #include <utility>
22 #include <vector>
23 
24 #include "mfem.hpp"
25 
26 #include "smith/smith_config.hpp"
31 #include "smith/numerics/functional/integral.hpp"
32 #include "smith/numerics/functional/differentiate_wrt.hpp"
33 #include "smith/numerics/functional/element_restriction.hpp"
35 
36 namespace smith {
37 
39 constexpr int SOURCE = 0;
40 constexpr int FLUX = 1;
41 constexpr int VALUE = 0;
42 constexpr int DERIVATIVE = 1;
44 
45 template <int... i>
46 struct DependsOn {};
47 
58 template <typename... T>
59 constexpr uint32_t index_of_differentiation()
60 {
61  constexpr uint32_t n = sizeof...(T);
62  bool matching[] = {std::is_same_v<T, differentiate_wrt_this>...};
63  for (uint32_t i = 0; i < n; i++) {
64  if (matching[i]) {
65  return i;
66  }
67  }
68  return NO_DIFFERENTIATION;
69 }
70 
74 template <int ind>
75 struct Index {
79  constexpr operator int() { return ind; }
80 };
81 
83 inline void check_for_missing_nodal_gridfunc(const mfem::Mesh& mesh)
84 {
85  if (mesh.GetNodes() == nullptr) {
86  SLIC_ERROR_ROOT(
87  R"errmsg(
88  the provided mesh does not have a nodal gridfunction.
89  If you created an mfem::Mesh manually, make sure that the
90  following member functions are invoked before use
91 
92  > mfem::Mesh::EnsureNodes();
93  > mfem::ParMesh::ExchangeFaceNbrData();
94 
95  or else the mfem::Mesh won't be fully initialized
96  )errmsg";);
97  }
98 }
99 
101 inline void check_for_unsupported_elements(const mfem::Mesh& mesh)
102 {
103  int num_elements = mesh.GetNE();
104  for (int e = 0; e < num_elements; e++) {
105  auto type = mesh.GetElementType(e);
106  if (type == mfem::Element::POINT || type == mfem::Element::WEDGE || type == mfem::Element::PYRAMID) {
107  SLIC_ERROR_ROOT("Mesh contains unsupported element type");
108  }
109  }
110 }
111 
120 template <typename function_space>
121 inline std::pair<std::unique_ptr<mfem::ParFiniteElementSpace>, std::unique_ptr<mfem::FiniteElementCollection>>
122 generateParFiniteElementSpace(mfem::ParMesh* mesh)
123 {
124  const int dim = mesh->Dimension();
125  std::unique_ptr<mfem::FiniteElementCollection> fec;
126 
127  switch (function_space::family) {
128  case Family::H1:
129  fec = std::make_unique<mfem::H1_FECollection>(function_space::order, dim);
130  break;
131  case Family::HCURL:
132  fec = std::make_unique<mfem::ND_FECollection>(function_space::order, dim);
133  break;
134  case Family::HDIV:
135  fec = std::make_unique<mfem::RT_FECollection>(function_space::order, dim);
136  break;
137  case Family::L2:
138  // We use GaussLobatto basis functions as this is what is used for the smith::Functional FE kernels
139  fec = std::make_unique<mfem::L2_FECollection>(function_space::order, dim, mfem::BasisType::GaussLobatto);
140  break;
141  default:
142  return {nullptr, nullptr};
143  }
144 
145  // NOTE: Clang-tidy complains about a potential leak of memory pointed at `fes` whenever its dereferenced,
146  // because MFEM still uses raw pointers. Assuming MFEM handles its pointers properly, it can be safely ignored.
147  auto fes =
148  std::make_unique<mfem::ParFiniteElementSpace>(mesh, fec.get(), function_space::components, smith::ordering);
149 
150  return std::pair(std::move(fes), std::move(fec));
151 }
152 
159 inline void updateFaceNbrData(const mfem::ParFiniteElementSpace* const_trial_space, mfem::ParGridFunction& trial_pgf,
160  mfem::Vector& trial_tdof_vals)
161 {
162  mfem::ParFiniteElementSpace* nonconst_trial_space = const_cast<mfem::ParFiniteElementSpace*>(const_trial_space);
163 
164  // Link the ParGridFunction to external FE space and data by reference
165  trial_pgf.MakeRef(nonconst_trial_space, trial_tdof_vals.GetData());
166 
167  // Exchange face nbr data to set the vector in ParGridFunction with the right size
168  // This should automatically invoke ExchangeFaceNbrData on the subsequent ParFESpace and ParMesh
169  trial_pgf.ExchangeFaceNbrData();
170 }
171 
176 inline void appendFaceNbrData(const mfem::ParFiniteElementSpace* trial_space, const mfem::ParGridFunction& trial_pgf,
177  const int LSize, mfem::Vector& input_L)
178 {
179  int num_ghost_elem = trial_space->GetParMesh()->GetNFaceNeighborElements();
180  int components_per_node = trial_space->GetVDim();
181  const mfem::Vector& face_neighbor_data = trial_pgf.FaceNbrData();
182 
183  int offset = 0;
184  for (int n = 0; n < num_ghost_elem; ++n) {
185  mfem::Array<int> old_shared_elem_vdof_ids;
186  trial_space->GetFaceNbrElementVDofs(n, old_shared_elem_vdof_ids);
187  int dofs_per_element = old_shared_elem_vdof_ids.Size() / components_per_node;
188 
189  // Put the entries into their new index
190  for (int m = 0; m < old_shared_elem_vdof_ids.Size(); ++m) {
191  // -------------- dofs before -------------- ---- component -----
192  int new_id = (m % dofs_per_element) * components_per_node + m / dofs_per_element + offset;
193  input_L[LSize + new_id] = face_neighbor_data(old_shared_elem_vdof_ids[m]);
194  }
195  // Increase offset for next ghost element
196  offset += old_shared_elem_vdof_ids.Size();
197  }
198 }
199 
203 inline void rearrangeFaceNbrDofGlobalIndex(const mfem::ParFiniteElementSpace* trial_space,
204  mfem::Array<HYPRE_BigInt>& face_nbr_glob_vdof_map)
205 {
206  int components_per_node = trial_space->GetVDim();
207  if (components_per_node == 1) {
208  face_nbr_glob_vdof_map = trial_space->face_nbr_glob_dof_map;
209  return;
210  }
211 
212  if (components_per_node > 1 && trial_space->GetOrdering() == mfem::Ordering::byNODES) {
213  SLIC_ERROR_ROOT("Unsupported: L2 vector field ordered by nodes");
214  }
215 
216  face_nbr_glob_vdof_map.SetSize(trial_space->face_nbr_glob_dof_map.Size());
217  int num_ghost_elem = trial_space->GetParMesh()->GetNFaceNeighborElements();
218  const HYPRE_BigInt* face_nbr_glob_dof_index = trial_space->face_nbr_glob_dof_map;
219 
220  int offset = 0;
221  for (int n = 0; n < num_ghost_elem; ++n) {
222  mfem::Array<int> old_shared_elem_vdof_ids;
223  trial_space->GetFaceNbrElementVDofs(n, old_shared_elem_vdof_ids);
224  int dofs_per_element = old_shared_elem_vdof_ids.Size() / components_per_node;
225 
226  // Put the entries into their new index
227  for (int m = 0; m < old_shared_elem_vdof_ids.Size(); ++m) {
228  int new_id = (m % dofs_per_element) * components_per_node + m / dofs_per_element + offset;
229  face_nbr_glob_vdof_map[new_id] = face_nbr_glob_dof_index[old_shared_elem_vdof_ids[m]];
230  }
231  // Increase offset for next ghost element
232  offset += old_shared_elem_vdof_ids.Size();
233  }
234 }
235 
237 template <typename T, ExecutionSpace exec = smith::default_execution_space>
238 class Functional;
240 
281 template <typename test, typename... trials, ExecutionSpace exec>
282 class Functional<test(trials...), exec> {
283  static constexpr tuple<trials...> trial_spaces{};
284  static constexpr uint32_t num_trial_spaces = sizeof...(trials);
285  static constexpr auto Q = std::max({test::order, trials::order...}) + 1;
286 
287  static constexpr mfem::Geometry::Type elem_geom[4] = {mfem::Geometry::INVALID, mfem::Geometry::SEGMENT,
288  mfem::Geometry::SQUARE, mfem::Geometry::CUBE};
289  static constexpr mfem::Geometry::Type simplex_geom[4] = {mfem::Geometry::INVALID, mfem::Geometry::SEGMENT,
290  mfem::Geometry::TRIANGLE, mfem::Geometry::TETRAHEDRON};
291 
292  class Gradient;
293 
294  // clang-format off
295  template <uint32_t i>
296  struct operator_paren_return {
297  using type = typename std::conditional<
298  i == NO_DIFFERENTIATION, // if `i` indicates that we want to skip differentiation
299  mfem::Vector&, // we just return the value
300  smith::tuple<mfem::Vector&, Gradient&> // otherwise we return the value and the derivative w.r.t arg `i`
301  >::type;
302  };
303  // clang-format on
304 
305  public:
311  Functional(const mfem::ParFiniteElementSpace* test_fes,
312  std::array<const mfem::ParFiniteElementSpace*, num_trial_spaces> trial_fes)
313  : update_qdata_(false), test_space_(test_fes), trial_space_(trial_fes), mem_type(mfem::Device::GetMemoryType())
314  {
316 
317  test_function_space_ = {test::family, test::order, test::components};
318 
319  std::array<Family, num_trial_spaces> trial_families = {trials::family...};
320  std::array<int, num_trial_spaces> trial_orders = {trials::order...};
321  std::array<int, num_trial_spaces> trial_components = {trials::components...};
322 
323  for (uint32_t i = 0; i < num_trial_spaces; i++) {
324  trial_function_spaces_[i] = {trial_families[i], trial_orders[i], trial_components[i]};
325 
326  P_trial_[i] = trial_space_[i]->GetProlongationMatrix();
327 
328  // For L2 spaces, configure a ParGridFunction with external data to exchange face neighbor data.
329  // This is for DG method where interior faces on the processor boundary need to access data on
330  // the neighrbor element
331  if (trial_function_spaces_[i].family == Family::L2) {
332  // Set the vector size to store tdof data
333  input_ldof_values_[i].SetSize(P_trial_[i]->Height(), mem_type);
334 
335  // Initialize face neighbor data vector
336  updateFaceNbrData(trial_space_[i], trial_pargrid_functions_[i], input_ldof_values_[i]);
337 
338  // Set the local input vector size to store local + ghost data
339  input_L_[i].SetSize(input_ldof_values_[i].Size() + trial_pargrid_functions_[i].FaceNbrData().Size(), mem_type);
340 
341  // Rearrange the global index of face neighbor vdofs to the correct ordering.
342  // This only needs to be done once
343  rearrangeFaceNbrDofGlobalIndex(trial_space_[i], face_nbr_glob_vdof_maps_[i]);
344  } else {
345  input_L_[i].SetSize(P_trial_[i]->Height(), mem_type);
346  }
347 
348  // create the necessary number of empty mfem::Vectors, to be resized later
349  input_E_.push_back({});
350  input_E_buffer_.push_back({});
351  }
352 
353  // for (auto type : {Domain::Type::Elements, Domain::Type::BoundaryElements, Domain::Type::InteriorFaces}) {
354  // output_E_[type].Update(G_test_[type].bOffsets(), mem_type);
355  // }
356 
357  P_test_ = test_space_->GetProlongationMatrix();
358 
359  if (test_function_space_.family == Family::L2) {
360  // We only need these variables to set the output_L_ vector to the right size
361  mfem::ParGridFunction X_pgf;
362  mfem::Vector X_ldof_values(P_test_->Height(), mem_type);
363 
364  updateFaceNbrData(test_space_, X_pgf, X_ldof_values);
365  output_L_.SetSize(X_ldof_values.Size() + X_pgf.FaceNbrData().Size(), mem_type);
366  } else {
367  output_L_.SetSize(P_test_->Height(), mem_type);
368  }
369 
370  output_T_.SetSize(test_fes->GetTrueVSize(), mem_type);
371 
372  // gradient objects depend on some member variables in
373  // Functional, so we initialize the gradient objects last
374  // to ensure that those member variables are initialized first
375  for (uint32_t i = 0; i < num_trial_spaces; i++) {
376  grad_.emplace_back(*this, i);
377  }
378  }
379 
391  template <int dim, int... args, typename lambda, typename qpt_data_type = Nothing>
392  void AddDomainIntegral(Dimension<dim>, DependsOn<args...>, const lambda& integrand, Domain& domain,
393  std::shared_ptr<QuadratureData<qpt_data_type>> qdata = NoQData)
394  {
395  if (domain.mesh_.GetNE() == 0) return;
396 
397  SLIC_ERROR_ROOT_IF(dim != domain.mesh_.Dimension(), "invalid mesh dimension for domain integral");
398 
399  check_for_unsupported_elements(domain.mesh_);
400  check_for_missing_nodal_gridfunc(domain.mesh_);
401 
402  std::vector<uint32_t> arg_vec = {args...};
403  for (uint32_t i : arg_vec) {
404  domain.insert_restriction(trial_space_[i], trial_function_spaces_[i]);
405  }
406  domain.insert_restriction(test_space_, test_function_space_);
407 
408  using signature = test(decltype(smith::type<args>(trial_spaces))...);
409  integrals_.push_back(
410  MakeDomainIntegral<signature, Q, dim>(domain, integrand, qdata, std::vector<uint32_t>{args...}));
411  }
412 
422  template <int dim, int... args, typename lambda>
423  void AddBoundaryIntegral(Dimension<dim>, DependsOn<args...>, const lambda& integrand, Domain& domain)
424  {
425  auto num_bdr_elements = domain.mesh_.GetNBE();
426  if (num_bdr_elements == 0) return;
427 
428  SLIC_ERROR_ROOT_IF(dim != domain.dim_, "invalid domain of integration for boundary integral");
429 
430  check_for_missing_nodal_gridfunc(domain.mesh_);
431 
432  std::vector<uint32_t> arg_vec = {args...};
433  for (uint32_t i : arg_vec) {
434  domain.insert_restriction(trial_space_[i], trial_function_spaces_[i]);
435  }
436  domain.insert_restriction(test_space_, test_function_space_);
437 
438  using signature = test(decltype(smith::type<args>(trial_spaces))...);
439  integrals_.push_back(MakeBoundaryIntegral<signature, Q, dim>(domain, integrand, std::vector<uint32_t>{args...}));
440  }
441 
445  template <int dim, int... args, typename Integrand>
446  void AddInteriorFaceIntegral(Dimension<dim>, DependsOn<args...>, const Integrand& integrand, Domain& domain)
447  {
448  check_for_missing_nodal_gridfunc(domain.mesh_);
449 
450  std::vector<uint32_t> arg_vec = {args...};
451  for (uint32_t i : arg_vec) {
452  domain.insert_restriction(trial_space_[i], trial_function_spaces_[i]);
453  }
454  domain.insert_restriction(test_space_, test_function_space_);
455 
456  using signature = test(decltype(smith::type<args>(trial_spaces))...);
457  integrals_.push_back(
458  MakeInteriorFaceIntegral<signature, Q, dim>(domain, integrand, std::vector<uint32_t>{args...}));
459  }
460 
470  template <int... args, typename lambda, typename qpt_data_type = Nothing>
471  void AddAreaIntegral(DependsOn<args...> which_args, const lambda& integrand, Domain& domain,
472  std::shared_ptr<QuadratureData<qpt_data_type>> data = NoQData)
473  {
474  AddDomainIntegral(Dimension<2>{}, which_args, integrand, domain, data);
475  }
476 
486  template <int... args, typename lambda, typename qpt_data_type = Nothing>
487  void AddVolumeIntegral(DependsOn<args...> which_args, const lambda& integrand, Domain& domain,
488  std::shared_ptr<QuadratureData<qpt_data_type>> data = NoQData)
489  {
490  AddDomainIntegral(Dimension<3>{}, which_args, integrand, domain, data);
491  }
492 
494  template <int... args, typename lambda>
495  void AddSurfaceIntegral(DependsOn<args...> which_args, const lambda& integrand, Domain& domain)
496  {
497  AddBoundaryIntegral(Dimension<2>{}, which_args, integrand, domain);
498  }
499 
511  void ActionOfGradient(const mfem::Vector& input_T, mfem::Vector& output_T, uint32_t which) const
512  {
513  // Please refer to 'operator()' below for detailed comments of this implementation
514  if (trial_function_spaces_[which].family == Family::L2) {
515  // copy input_L[which] and facenbrdata[which] into a common array like:
516  // first part second part
517  // [ --- L --- | --- FND --- ]
518  P_trial_[which]->Mult(input_T, input_ldof_values_[which]);
519  input_L_[which].SetVector(input_ldof_values_[which], 0);
520  updateFaceNbrData(trial_space_[which], trial_pargrid_functions_[which], input_ldof_values_[which]);
521 
522  // Ensure ghost data ordering is consistent with local data
523  if (trial_space_[which]->GetVDim() == 1) {
524  input_L_[which].SetVector(trial_pargrid_functions_[which].FaceNbrData(), input_ldof_values_[which].Size());
525  } else {
526  if (trial_space_[which]->GetOrdering() == mfem::Ordering::byVDIM) {
527  int local_size = input_ldof_values_[which].Size();
528  appendFaceNbrData(trial_space_[which], trial_pargrid_functions_[which], local_size, input_L_[which]);
529  } else {
530  SLIC_ERROR_ROOT("Unsupported: L2 vector field ordered by nodes");
531  }
532  }
533  } else {
534  P_trial_[which]->Mult(input_T, input_L_[which]);
535  }
536 
537  output_L_ = 0.0;
538 
539  for (auto& integral : integrals_) {
540  if (integral.DependsOn(which)) {
541  Domain& dom = integral.domain_;
542 
543  const smith::BlockElementRestriction& G_trial = dom.get_restriction(trial_function_spaces_[which]);
544  input_E_buffer_[which].SetSize(int(G_trial.ESize()));
545  input_E_[which].Update(input_E_buffer_[which], G_trial.bOffsets());
546  G_trial.Gather(input_L_[which], input_E_[which]);
547 
548  const smith::BlockElementRestriction& G_test = dom.get_restriction(test_function_space_);
549  output_E_buffer_.SetSize(int(G_test.ESize()));
550  output_E_.Update(output_E_buffer_, G_test.bOffsets());
551 
552  integral.GradientMult(input_E_[which], output_E_, which);
553 
554  // scatter-add to compute residuals on the local processor
555  G_test.ScatterAdd(output_E_, output_L_);
556  }
557  }
558 
559  // scatter-add to compute global residuals
560  if (test_function_space_.family == Family::L2) {
561  // Extract a subvector from output_L_ for local residuals excluding ghost dofs for L2 spaces
562  // The output vector is in form [ --- L --- | --- FND --- ]
563  mfem::Vector output_ldof_values(P_test_->Height(), mem_type);
564  for (int j = 0; j < output_ldof_values.Size(); ++j) {
565  output_ldof_values[j] = output_L_[j];
566  }
567  P_test_->MultTranspose(output_ldof_values, output_T);
568  } else {
569  P_test_->MultTranspose(output_L_, output_T);
570  }
571  }
572 
585  template <uint32_t wrt, typename... T>
586  typename operator_paren_return<wrt>::type operator()(DifferentiateWRT<wrt>, double t, const T&... args)
587  {
588  const mfem::Vector* input_T[] = {&static_cast<const mfem::Vector&>(args)...};
589 
590  // get the values for each local processor
591  for (uint32_t i = 0; i < num_trial_spaces; i++) {
592  if (trial_function_spaces_[i].family == Family::L2) {
593  // copy input_L[i] and facenbrdata[i] into a common array like:
594  // first part second part
595  // [ --- L --- | --- FND --- ]
596  P_trial_[i]->Mult(*input_T[i], input_ldof_values_[i]);
597 
598  // MakeRef from mfem::Vector will actually delete the data in the original vector, so we assign the
599  // values directly by SetVector, which invokes additional operations. This can be optimized later.
600  input_L_[i].SetVector(input_ldof_values_[i], 0);
601 
602  // Update again with new data
603  updateFaceNbrData(trial_space_[i], trial_pargrid_functions_[i], input_ldof_values_[i]);
604 
605  // Set the values of neighbor ghost dofs
606  // The entries in face_nbr_data vector is ALWAYS arranged byNODES per "volumetric" element.
607  // For example with two quadrilateral ghost elements we would have such face_nbr_data
608  // ----------- Ghost elem 1 ---------- ----------- Ghost elem 2 ----------
609  // {X1 X1 X1 X1 Y1 Y1 Y1 Y1 Z1 Z1 Z1 Z1 X2 X2 X2 X2 Y2 Y2 Y2 Y2 Z2 Z2 Z2 Z2}
610  //
611  // This is because 1) mfem prepares face neighbor data element by element to communicate later (standard).
612  // 2) mfem uses GetElementVDofs to gather the data for communication which returns a set of indices ALWAYS
613  // ordered byNODES (???). If the ordering we set for ParFiniteElementSpace is byVDIM, we need the entries to be
614  // ----------- Ghost elem 1 ---------- ----------- Ghost elem 2 ----------
615  // {X1 Y1 Z1 X1 Y1 Z1 X1 Y1 Z1 X1 Y1 Z1 X2 Y2 Z2 X2 Y2 Z2 X2 Y2 Z2 X2 Y2 Z2}
616  // so it's consistent with the local data. Therefore, we need to manually change this ordering.
617  if (trial_space_[i]->GetVDim() == 1) {
618  // For scalar fields, this weird ordering doesn't cause any problems.
619  input_L_[i].SetVector(trial_pargrid_functions_[i].FaceNbrData(), input_ldof_values_[i].Size());
620  } else {
621  if (trial_space_[i]->GetOrdering() == mfem::Ordering::byVDIM) {
622  // For vector fields with byVDIM ordering, we manually set the entries in VDIM order. By the way this
623  // is really annoying because we have to get face neighbor vdof indices again in element restriction.
624  int local_size = input_ldof_values_[i].Size();
625  appendFaceNbrData(trial_space_[i], trial_pargrid_functions_[i], local_size, input_L_[i]);
626  } else {
627  // For vector fields with byNODES ordering, we need to prepare the local input vector in the form
628  // { true_comp_X ghost_comp_X true_comp_Y ghost_comp_Y true_comp_Z ghost_comp_Z }.
629  // In this form, to ensure the correct mapping between dof and vdof, we need to change the ndofs in
630  // FiniteElementSpace to include ones from ghost element. Additionally, we need to prepare
631  // ghost_comp_X / ghost_comp_Y / ghost_comp_Z from element wise entries in face_nbr_data,
632  // eg. ghost_comp_X includes X component entries of all ghost element dofs.
633  // This requires significant renumbering of the dofs and therefore is not supported for now.
634  SLIC_ERROR_ROOT("Unsupported: L2 vector field ordered by nodes");
635  }
636  }
637  } else {
638  P_trial_[i]->Mult(*input_T[i], input_L_[i]);
639  }
640  }
641 
642  output_L_ = 0.0;
643 
644  for (auto& integral : integrals_) {
645  Domain& dom = integral.domain_;
646 
647  const smith::BlockElementRestriction& G_test = dom.get_restriction(test_function_space_);
648 
649  for (auto i : integral.active_trial_spaces_) {
650  const smith::BlockElementRestriction& G_trial = dom.get_restriction(trial_function_spaces_[i]);
651  input_E_buffer_[i].SetSize(int(G_trial.ESize()));
652  input_E_[i].Update(input_E_buffer_[i], G_trial.bOffsets());
653  G_trial.Gather(input_L_[i], input_E_[i]);
654  }
655 
656  output_E_buffer_.SetSize(int(G_test.ESize()));
657  output_E_.Update(output_E_buffer_, G_test.bOffsets());
658  integral.Mult(t, input_E_, output_E_, wrt, update_qdata_);
659 
660  // scatter-add to compute residuals on the local processor
661  G_test.ScatterAdd(output_E_, output_L_);
662  }
663 
664  // scatter-add to compute global residuals
665  if (test_function_space_.family == Family::L2) {
666  // Extract a subvector from output_L_ for local residuals excluding ghost dofs for L2 spaces
667  // The output vector is in form [ --- L --- | --- FND --- ]
668  mfem::Vector output_ldof_values(P_test_->Height(), mem_type);
669  for (int j = 0; j < output_ldof_values.Size(); ++j) {
670  output_ldof_values[j] = output_L_[j];
671  }
672  P_test_->MultTranspose(output_ldof_values, output_T_);
673  } else {
674  P_test_->MultTranspose(output_L_, output_T_);
675  }
676 
677  if constexpr (wrt != NO_DIFFERENTIATION) {
678  // if the user has indicated they'd like to evaluate and differentiate w.r.t.
679  // a specific argument, then we return both the value and gradient w.r.t. that argument
680  //
681  // mfem::Vector arg0 = ...;
682  // mfem::Vector arg1 = ...;
683  // e.g. auto [value, gradient_wrt_arg1] = my_functional(arg0, differentiate_wrt(arg1));
684  return {output_T_, grad_[wrt]};
685  }
686 
687  if constexpr (wrt == NO_DIFFERENTIATION) {
688  // if the user passes only `mfem::Vector`s then we assume they only want the output value
689  //
690  // mfem::Vector arg0 = ...;
691  // mfem::Vector arg1 = ...;
692  // e.g. mfem::Vector value = my_functional(arg0, arg1);
693  return output_T_;
694  }
695  }
696 
698  template <typename... T>
699  auto operator()(double t, const T&... args)
700  {
701  constexpr int num_differentiated_arguments = (std::is_same_v<T, differentiate_wrt_this> + ...);
702  static_assert(num_differentiated_arguments <= 1,
703  "Error: Functional::operator() can only differentiate w.r.t. 1 argument a time");
704  static_assert(sizeof...(T) == num_trial_spaces,
705  "Error: Functional::operator() must take exactly as many arguments as trial spaces");
706 
707  [[maybe_unused]] constexpr uint32_t i = index_of_differentiation<T...>();
708 
709  return (*this)(DifferentiateWRT<i>{}, t, args...);
710  }
711 
720  void updateQdata(bool update_flag) { update_qdata_ = update_flag; }
721 
722  private:
724  bool update_qdata_;
725 
731  class Gradient : public mfem::Operator {
732  public:
737  Gradient(Functional<test(trials...), exec>& f, uint32_t which = 0)
738  : mfem::Operator(f.test_space_->GetTrueVSize(), f.trial_space_[which]->GetTrueVSize()),
739  form_(f),
740  which_argument(which),
741  test_space_(f.test_space_),
742  trial_space_(f.trial_space_[which]),
743  df_(f.test_space_->GetTrueVSize())
744  {
746  }
747 
753  virtual void Mult(const mfem::Vector& dx, mfem::Vector& df) const override
754  {
755  form_.ActionOfGradient(dx, df, which_argument);
756  }
757 
759  mfem::Vector& operator()(const mfem::Vector& dx)
760  {
761  form_.ActionOfGradient(dx, df_, which_argument);
762  return df_;
763  }
764 
765  void initialize_sparsity_pattern()
766  {
767  using row_col = std::tuple<int, int>;
768 
769  std::set<row_col> nonzero_entries;
770 
771  for (auto& integral : form_.integrals_) {
772  if (integral.DependsOn(which_argument)) {
773  Domain& dom = integral.domain_;
774  const auto& G_test = dom.get_restriction(form_.test_function_space_);
775  const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
776  for (const auto& [geom, test_restriction] : G_test.restrictions) {
777  const auto& trial_restriction = G_trial.restrictions.at(geom);
778 
779  // the degrees of freedom associated with the rows/columns of the e^th element stiffness matrix
780  std::vector<int> test_vdofs(test_restriction.nodes_per_elem * test_restriction.components);
781  std::vector<int> trial_vdofs(trial_restriction.nodes_per_elem * trial_restriction.components);
782 
783  auto num_elements = static_cast<uint32_t>(test_restriction.num_elements);
784  for (uint32_t e = 0; e < num_elements; e++) {
785  for (uint32_t i = 0; i < test_restriction.nodes_per_elem; i++) {
786  auto test_dof = test_restriction.dof_info(e, i);
787  for (uint32_t j = 0; j < test_restriction.components; j++) {
788  test_vdofs[i * test_restriction.components + j] = int(test_restriction.GetVDof(test_dof, j).index());
789  }
790  }
791 
792  for (uint32_t i = 0; i < trial_restriction.nodes_per_elem; i++) {
793  auto trial_dof = trial_restriction.dof_info(e, i);
794  for (uint32_t j = 0; j < trial_restriction.components; j++) {
795  trial_vdofs[i * trial_restriction.components + j] =
796  int(trial_restriction.GetVDof(trial_dof, j).index());
797  }
798  }
799 
800  for (int row : test_vdofs) {
801  for (int col : trial_vdofs) {
802  nonzero_entries.insert({row, col});
803  }
804  }
805  }
806  }
807  }
808  }
809 
810  uint64_t nnz = nonzero_entries.size();
811  int nrows = form_.output_L_.Size();
812 
813  row_ptr.resize(uint32_t(nrows + 1));
814  col_ind.resize(nnz);
815 
816  int nz = 0;
817  int last_row = -1;
818  for (auto [row, col] : nonzero_entries) {
819  col_ind[uint32_t(nz)] = col;
820  for (int i = last_row + 1; i <= row; i++) {
821  row_ptr[uint32_t(i)] = nz;
822  }
823  last_row = row;
824  nz++;
825  }
826  for (int i = last_row + 1; i <= nrows; i++) {
827  row_ptr[uint32_t(i)] = nz;
828  }
829  };
830 
831  uint64_t max_buffer_size()
832  {
833  uint64_t max_entries = 0;
834  for (auto& integral : form_.integrals_) {
835  if (integral.DependsOn(which_argument)) {
836  Domain& dom = integral.domain_;
837  const auto& G_test = dom.get_restriction(form_.test_function_space_);
838  const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
839  for (const auto& [geom, test_restriction] : G_test.restrictions) {
840  const auto& trial_restriction = G_trial.restrictions.at(geom);
841  uint64_t nrows_per_element = test_restriction.nodes_per_elem * test_restriction.components;
842  uint64_t ncols_per_element = trial_restriction.nodes_per_elem * trial_restriction.components;
843  uint64_t entries_per_element = nrows_per_element * ncols_per_element;
844  uint64_t entries_needed = test_restriction.num_elements * entries_per_element;
845  max_entries = std::max(entries_needed, max_entries);
846  }
847  }
848  }
849  return max_entries;
850  }
851 
852  std::unique_ptr<mfem::HypreParMatrix> assemble()
853  {
854  if (row_ptr.empty()) {
855  initialize_sparsity_pattern();
856  }
857 
858  // since we own the storage for row_ptr, col_ind, values,
859  // we ask mfem to not deallocate those pointers in the SparseMatrix dtor
860  constexpr bool sparse_matrix_frees_graph_ptrs = false;
861  constexpr bool sparse_matrix_frees_values_ptr = false;
862  constexpr bool col_ind_is_sorted = true;
863 
864  // note: we make a copy of col_ind since mfem::HypreParMatrix
865  // changes it in the constructor
866  std::vector<int> col_ind_copy = col_ind;
867 
868  int nnz = row_ptr.back();
869  std::vector<double> values(uint32_t(nnz), 0.0);
870  auto A_local = mfem::SparseMatrix(row_ptr.data(), col_ind_copy.data(), values.data(), form_.output_L_.Size(),
871  form_.input_L_[which_argument].Size(), sparse_matrix_frees_graph_ptrs,
872  sparse_matrix_frees_values_ptr, col_ind_is_sorted);
873 
874  std::vector<double> K_elem_buffer(max_buffer_size());
875 
876  for (auto& integral : form_.integrals_) {
877  // if this integral's derivative isn't identically zero
878  if (integral.functional_to_integral_index_.count(which_argument) > 0) {
879  Domain& dom = integral.domain_;
880 
881  uint32_t id = integral.functional_to_integral_index_.at(which_argument);
882  const auto& G_test = dom.get_restriction(form_.test_function_space_);
883  const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]);
884  for (const auto& [geom, calculate_element_matrices_func] : integral.element_gradient_[id]) {
885  const auto& test_restriction = G_test.restrictions.at(geom);
886  const auto& trial_restriction = G_trial.restrictions.at(geom);
887 
888  // prepare a buffer to hold the element matrices
889  CPUArrayView<double, 3> K_e(K_elem_buffer.data(), test_restriction.num_elements,
890  trial_restriction.nodes_per_elem * trial_restriction.components,
891  test_restriction.nodes_per_elem * test_restriction.components);
892  detail::zero_out(K_e);
893 
894  // perform the actual calculations
895  calculate_element_matrices_func(K_e);
896 
897  const std::vector<int>& element_ids = integral.domain_.get(geom);
898 
899  uint32_t rows_per_elem = uint32_t(test_restriction.nodes_per_elem * test_restriction.components);
900  uint32_t cols_per_elem = uint32_t(trial_restriction.nodes_per_elem * trial_restriction.components);
901 
902  std::vector<DoF> test_vdofs(rows_per_elem);
903  std::vector<DoF> trial_vdofs(cols_per_elem);
904 
905  for (uint32_t e = 0; e < element_ids.size(); e++) {
906  test_restriction.GetElementVDofs(int(e), test_vdofs);
907  trial_restriction.GetElementVDofs(int(e), trial_vdofs);
908 
909  for (uint32_t i = 0; i < cols_per_elem; i++) {
910  int col = int(trial_vdofs[i].index());
911 
912  for (uint32_t j = 0; j < rows_per_elem; j++) {
913  int row = int(test_vdofs[j].index());
914  A_local.SearchRow(row, col) += K_e(e, i, j);
915  }
916  }
917  }
918  }
919  }
920  }
921 
922  auto* R = form_.test_space_->Dof_TrueDof_Matrix();
923 
924  // If either test_space_ or trial_space_ is L2, the local matrix (A_local) will includ ghost rows and columns like
925  // ------------ -------
926  // | || |
927  // | diagonal || off-d |
928  // | block || block |
929  // | || |
930  // ------------ -------
931  // ------------ -------
932  // | ghost || XXX |
933  // | rows || XXX |
934  // ------------ -------
935  // We only need
936  // ------------ -------
937  // | || |
938  // | diagonal || off-d |
939  // | block || block |
940  // | || |
941  // ------------ -------
942  // to compute local residual and the neighbor dof values will be communicated to multiply into off-digonal block.
943  // So we construct a HypreParMatrix that contains the off-diagonal block from the local sparse matrix
944  mfem::HypreParMatrix* A_hypre;
945  if (dynamic_cast<const mfem::L2_FECollection*>(test_space_->FEColl()) ||
946  dynamic_cast<const mfem::L2_FECollection*>(trial_space_->FEColl())) {
947  int lrows = test_space_->GetVSize();
948  int lcols = trial_space_->GetVSize();
949  HYPRE_BigInt col_offset = trial_space_->GetMyDofOffset();
950  mfem::Array<HYPRE_BigInt> glob_J(A_local.NumNonZeroElems());
951 
952  int* J = A_local.GetJ();
953  for (int j = 0; j < glob_J.Size(); ++j) {
954  if (J[j] < lcols) {
955  glob_J[j] = J[j] + col_offset;
956  } else {
957  glob_J[j] = form_.face_nbr_glob_vdof_maps_[which_argument][J[j] - lcols];
958  }
959  }
960  A_hypre = new mfem::HypreParMatrix(test_space_->GetComm(), lrows, test_space_->GlobalVSize(),
961  trial_space_->GlobalVSize(), A_local.GetI(), glob_J, A_local.GetData(),
962  test_space_->GetDofOffsets(), trial_space_->GetDofOffsets());
963  glob_J.DeleteAll();
964  } else {
965  A_hypre =
966  new mfem::HypreParMatrix(test_space_->GetComm(), test_space_->GlobalVSize(), trial_space_->GlobalVSize(),
967  test_space_->GetDofOffsets(), trial_space_->GetDofOffsets(), &A_local);
968  }
969 
970  auto* P = trial_space_->Dof_TrueDof_Matrix();
971 
972  std::unique_ptr<mfem::HypreParMatrix> A(mfem::RAP(R, A_hypre, P));
973 
974  delete A_hypre;
975 
976  return A;
977  };
978 
979  friend auto assemble(Gradient& g) { return g.assemble(); }
980 
981  private:
983  Functional<test(trials...), exec>& form_;
984 
985  std::vector<int> row_ptr;
986  std::vector<int> col_ind;
987 
998  uint32_t which_argument;
999 
1001  const mfem::ParFiniteElementSpace* test_space_;
1002 
1004  const mfem::ParFiniteElementSpace* trial_space_;
1005 
1007  mfem::Vector df_;
1008  };
1009 
1011  const mfem::ParFiniteElementSpace* test_space_;
1012 
1014  std::array<const mfem::ParFiniteElementSpace*, num_trial_spaces> trial_space_;
1015 
1016  std::array<FunctionSpace, num_trial_spaces> trial_function_spaces_;
1017  FunctionSpace test_function_space_;
1018 
1020  std::array<mfem::Array<HYPRE_BigInt>, num_trial_spaces> face_nbr_glob_vdof_maps_;
1021 
1023  mutable std::array<mfem::ParGridFunction, num_trial_spaces> trial_pargrid_functions_;
1024 
1026  mutable std::array<mfem::Vector, num_trial_spaces> input_ldof_values_;
1027 
1032  const mfem::Operator* P_trial_[num_trial_spaces];
1033 
1035  mutable mfem::Vector input_L_[num_trial_spaces];
1036 
1037  mutable std::vector<mfem::Vector> input_E_buffer_;
1038  mutable std::vector<mfem::BlockVector> input_E_;
1039 
1040  mutable std::vector<Integral> integrals_;
1041 
1042  mutable mfem::Vector output_E_buffer_;
1043  mutable mfem::BlockVector output_E_;
1044 
1046  mutable mfem::Vector output_L_;
1047 
1048  const mfem::Operator* P_test_;
1049 
1051  mutable mfem::Vector output_T_;
1052 
1054  mutable std::vector<Gradient> grad_;
1055 
1056  const mfem::MemoryType mem_type;
1057 };
1058 
1059 } // namespace smith
1060 
1061 #include "functional_qoi.inl"
many of the functions in this file amount to extracting element indices from an mesh_t like
This file contains helper traits and enumerations for classifying finite elements.
a specialization of smith::Functional for quantities of interest
This file contains the all the necessary functions and macros required for logging as well as a helpe...
Accelerator functionality.
Definition: smith.cpp:36
void appendFaceNbrData(const mfem::ParFiniteElementSpace *trial_space, const mfem::ParGridFunction &trial_pgf, const int LSize, mfem::Vector &input_L)
helper functional to reorder the ordering of FaceNbrData for L2 space to byVDIM and append this vecto...
Definition: functional.hpp:167
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
std::pair< std::unique_ptr< mfem::ParFiniteElementSpace >, std::unique_ptr< mfem::FiniteElementCollection > > generateParFiniteElementSpace(mfem::ParMesh *mesh)
create an mfem::ParFiniteElementSpace from one of Smith's tag types: H1, Hcurl, L2
Definition: functional.hpp:113
std::shared_ptr< QuadratureData< Nothing > > NoQData
a single instance of a QuadratureData container of Nothings, since they are all interchangeable
void check_for_unsupported_elements(const mfem::Mesh &mesh)
function for verifying that there are no unsupported element types in the mesh
Definition: functional.hpp:92
constexpr SMITH_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:376
constexpr uint32_t index_of_differentiation()
given a list of types, this function returns the index that corresponds to the type dual_vector.
Definition: functional.hpp:59
SMITH_HOST_DEVICE auto max(dual< gradient_type > a, double b)
Implementation of max for dual numbers.
Definition: dual.hpp:229
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
Definition: accelerator.hpp:73
void rearrangeFaceNbrDofGlobalIndex(const mfem::ParFiniteElementSpace *trial_space, mfem::Array< HYPRE_BigInt > &face_nbr_glob_vdof_map)
helper functional to reorder the face_nbr_glob_dof_map for L2 space to byVDIM
Definition: functional.hpp:194
void check_for_missing_nodal_gridfunc(const mfem::Mesh &mesh)
function for verifying that the mesh has been fully initialized
Definition: functional.hpp:83
void updateFaceNbrData(const mfem::ParFiniteElementSpace *const_trial_space, mfem::ParGridFunction &trial_pgf, mfem::Vector &trial_tdof_vals)
helper function to locally cast away const on FE space so we can update face neighbor data with Excha...
Definition: functional.hpp:150
#define SMITH_MARK_FUNCTION
Definition: profiling.hpp:90
Definitions of quadrature rules for quads and hexes.
a generalization of mfem::ElementRestriction that works with multiple kinds of element geometries....
uint64_t ESize() const
the size of the "E-vector" associated with this restriction operator
void Gather(const mfem::Vector &L_vector, mfem::BlockVector &E_block_vector) const
"L->E" in mfem parlance, each element gathers the values that belong to it, and stores them in the "E...
void ScatterAdd(const mfem::BlockVector &E_block_vector, mfem::Vector &L_vector) const
"E->L" in mfem parlance, each element scatter-adds its local vector into the appropriate place in the...
std::map< mfem::Geometry::Type, ElementRestriction > restrictions
the individual ElementRestriction operators for each element geometry
mfem::Array< int > bOffsets() const
block offsets used when constructing mfem::HypreParVectors
Compile-time alias for a dimension.
Definition: geometry.hpp:17
a class for representing a geometric region that can be used for integration
Definition: domain.hpp:33
Compile-time alias for index of differentiation.
Definition: functional.hpp:75
these classes are a little confusing. These two special types represent the similar (but different) c...
A class for storing and access user-defined types at quadrature points.
This is a class that mimics most of std::tuple's interface, except that it is usable in CUDA kernels ...
Definition: tuple.hpp:28
Implementation of the tensor class used by Functional.