Serac  0.1
Serac is an implicit thermal strucural mechanics simulation code.
tuple_tensor_dual_functions.hpp
1 #pragma once
2 
6 
7 #include "mfem.hpp"
8 
9 namespace serac {
10 
12 template <typename T>
14  static constexpr bool value = false;
15 };
16 
18 template <typename T, int... n>
20  static constexpr bool value = true;
21 };
22 
31 template <typename S, typename T, int m, int... n,
32  typename = std::enable_if_t<std::is_arithmetic_v<S> || is_dual_number<S>::value>>
33 SERAC_HOST_DEVICE constexpr auto operator*(S scale, const tensor<T, m, n...>& A)
34 {
35  tensor<decltype(S{} * T{}), m, n...> C{};
36  for (int i = 0; i < m; i++) {
37  C[i] = scale * A[i];
38  }
39  return C;
40 }
41 
50 template <typename S, typename T, int m, int... n,
51  typename = std::enable_if_t<std::is_arithmetic_v<S> || is_dual_number<S>::value>>
52 SERAC_HOST_DEVICE constexpr auto operator*(const tensor<T, m, n...>& A, S scale)
53 {
54  tensor<decltype(T{} * S{}), m, n...> C{};
55  for (int i = 0; i < m; i++) {
56  C[i] = A[i] * scale;
57  }
58  return C;
59 }
60 
69 template <typename S, typename T, int m, int... n,
70  typename = std::enable_if_t<std::is_arithmetic_v<S> || is_dual_number<S>::value>>
71 SERAC_HOST_DEVICE constexpr auto operator/(S scale, const tensor<T, m, n...>& A)
72 {
73  tensor<decltype(S{} * T{}), n...> C{};
74  for (int i = 0; i < m; i++) {
75  C[i] = scale / A[i];
76  }
77  return C;
78 }
79 
88 template <typename S, typename T, int m, int... n,
89  typename = std::enable_if_t<std::is_arithmetic_v<S> || is_dual_number<S>::value>>
90 SERAC_HOST_DEVICE constexpr auto operator/(const tensor<T, m, n...>& A, S scale)
91 {
92  tensor<decltype(T{} * S{}), m, n...> C{};
93  for (int i = 0; i < m; i++) {
94  C[i] = A[i] / scale;
95  }
96  return C;
97 }
98 
100 template <int i, typename S, typename T>
101 struct one_hot_helper;
102 
103 template <int i, int... I, typename T>
104 struct one_hot_helper<i, std::integer_sequence<int, I...>, T> {
105  using type = tuple<std::conditional_t<i == I, T, zero>...>;
106 };
107 
108 template <int i, int n, typename T>
109 struct one_hot : public one_hot_helper<i, std::make_integer_sequence<int, n>, T> {
110 };
112 
119 template <int i, int n, typename T>
121 
123 template <int i, int N>
124 SERAC_HOST_DEVICE constexpr auto make_dual_helper(zero /*arg*/)
125 {
126  return zero{};
127 }
128 
136 template <int i, int N>
137 SERAC_HOST_DEVICE constexpr auto make_dual_helper(double arg)
138 {
139  using gradient_t = one_hot_t<i, N, double>;
140  dual<gradient_t> arg_dual{};
141  arg_dual.value = arg;
142  serac::get<i>(arg_dual.gradient) = 1.0;
143  return arg_dual;
144 }
145 
153 template <int i, int N, typename T, int... n>
155 {
156  using gradient_t = one_hot_t<i, N, tensor<T, n...>>;
157  tensor<dual<gradient_t>, n...> arg_dual{};
158  for_constexpr<n...>([&](auto... j) {
159  arg_dual(j...).value = arg(j...);
160  serac::get<i>(arg_dual(j...).gradient)(j...) = 1.0;
161  });
162  return arg_dual;
163 }
164 
182 template <typename T0, typename T1>
183 SERAC_HOST_DEVICE constexpr auto make_dual(const tuple<T0, T1>& args)
184 {
185  return tuple{make_dual_helper<0, 2>(get<0>(args)), make_dual_helper<1, 2>(get<1>(args))};
186 }
187 
189 template <typename T0, typename T1, typename T2>
190 SERAC_HOST_DEVICE constexpr auto make_dual(const tuple<T0, T1, T2>& args)
191 {
192  return tuple{make_dual_helper<0, 3>(get<0>(args)), make_dual_helper<1, 3>(get<1>(args)),
193  make_dual_helper<2, 3>(get<2>(args))};
194 }
195 
203 template <bool dualify, typename T>
205 {
206  if constexpr (dualify) {
207  return make_dual(x);
208  }
209  if constexpr (!dualify) {
210  return x;
211  }
212 }
213 
222 template <bool dualify, typename T, int n>
224 {
225  if constexpr (dualify) {
226  using return_type = decltype(make_dual(T{}));
227  tensor<return_type, n> output;
228  for (int i = 0; i < n; i++) {
229  output[i] = make_dual(x[i]);
230  }
231  return output;
232  }
233  if constexpr (!dualify) {
234  return x;
235  }
236 }
237 
239 template <int n, typename... T, int... i>
240 SERAC_HOST_DEVICE constexpr auto make_dual_helper(const serac::tuple<T...>& args, std::integer_sequence<int, i...>)
241 {
242  // Sam: it took me longer than I'd like to admit to find this issue, so here's an explanation
243  //
244  // note: we use serac::make_tuple(...) instead of serac::tuple{...} here because if
245  // the first argument passed in is of type `serac::tuple < serac::tuple < T ... > >`
246  // then doing something like
247  //
248  // serac::tuple{serac::get<i>(args)...};
249  //
250  // will be expand to something like
251  //
252  // serac::tuple{serac::tuple< T ... >{}};
253  //
254  // which invokes the copy ctor, returning a `serac::tuple< T ... >`
255  // instead of `serac::tuple< serac::tuple < T ... > >`
256  //
257  // but serac::make_tuple(serac::get<i>(args)...) will never accidentally trigger the copy ctor
258  return serac::make_tuple(promote_to_dual_when<i == n>(serac::get<i>(args))...);
259 }
260 
268 template <int n, typename... T>
269 constexpr auto make_dual_wrt(const serac::tuple<T...>& args)
270 {
271  return make_dual_helper<n>(args, std::make_integer_sequence<int, static_cast<int>(sizeof...(T))>{});
272 }
273 
283 template <typename T1, typename T2, int n>
285 {
286  tensor<decltype(get_value(tuple<T1, T2>{})), n> output{};
287  for (int i = 0; i < n; i++) {
288  output[i] = get_value(input[i]);
289  }
290  return output;
291 }
292 
298 template <typename... T>
299 SERAC_HOST_DEVICE auto get_value(const serac::tuple<T...>& tuple_of_values)
300 {
301  return serac::apply([](const auto&... each_value) { return serac::tuple{get_value(each_value)...}; },
302  tuple_of_values);
303 }
304 
309 template <typename... T>
311 {
312  return serac::apply([](auto... each_value) { return serac::tuple{each_value...}; }, arg.gradient);
313 }
314 
316 template <typename... T, int... n>
318 {
319  serac::tuple<outer_product_t<tensor<double, n...>, T>...> g{};
320  for_constexpr<n...>([&](auto... i) {
321  for_constexpr<sizeof...(T)>([&](auto j) { serac::get<j>(g)(i...) = serac::get<j>(arg(i...).gradient); });
322  });
323  return g;
324 }
325 
327 template <typename... T>
329 {
330  return serac::apply([](auto... each_value) { return serac::tuple{get_gradient(each_value)...}; }, tuple_of_values);
331 }
332 
338 template <int... n>
340 {
341  tensor<dual<tensor<double, n...>>, n...> A_dual{};
342  for_constexpr<n...>([&](auto... i) {
343  A_dual(i...).value = A(i...);
344  A_dual(i...).gradient(i...) = 1.0;
345  });
346  return A_dual;
347 }
348 
358 template <typename T, int n>
360 {
361  constexpr auto abs = [](double x) { return (x < 0) ? -x : x; };
362  constexpr auto swap = [](auto& x, auto& y) {
363  auto tmp = x;
364  x = y;
365  y = tmp;
366  };
367 
368  auto U = A;
369  // initialize L to Identity
370  auto L = tensor<T, n, n>{};
371  // This handles the case if T is a dual number
372  // TODO - BT: make a dense identity that is templated on type
373  for (int i = 0; i < n; i++) {
374  if constexpr (is_dual_number<T>::value) {
375  L[i][i].value = 1.0;
376  } else {
377  L[i][i] = 1.0;
378  }
379  }
380  tensor<int, n> P(make_tensor<n>([](auto i) { return i; }));
381 
382  for (int i = 0; i < n; i++) {
383  // Search for maximum in this column
384  double max_val = abs(get_value(U[i][i]));
385 
386  int max_row = i;
387  for (int j = i + 1; j < n; j++) {
388  auto U_ji = get_value(U[j][i]);
389  if (abs(U_ji) > max_val) {
390  max_val = abs(U_ji);
391  max_row = j;
392  }
393  }
394 
395  swap(P[max_row], P[i]);
396  swap(U[max_row], U[i]);
397  }
398 
399  for (int i = 0; i < n; i++) {
400  // zero entries below in this column in U
401  // and fill in L entries
402  for (int j = i + 1; j < n; j++) {
403  auto c = U[j][i] / U[i][i];
404  L[j][i] = c;
405  U[j] -= c * U[i];
406  U[j][i] = T{};
407  }
408  }
409 
410  return {P, L, U};
411 }
412 
419 template <typename S, typename T, int n, int... m>
421 {
422  // We want to avoid accumulating the derivative through the
423  // LU factorization, because it is computationally expensive.
424  // Instead, we perform the LU factorization on the values of
425  // A, and then two backsolves: one to compute the primal (x),
426  // and another to compute its derivative (dx).
427  // If A is not dual, the second solve is a no-op.
428 
429  // Strip off derivatives, if any, and compute only x (ie no derivative)
430  auto lu_factors = factorize_lu(get_value(A));
431  auto x = linear_solve(lu_factors, get_value(b));
432 
433  // Compute directional derivative of x.
434  // If both b and A are not dual, the zero type
435  // makes these no-ops.
436  auto r = get_gradient(b) - dot(get_gradient(A), x);
437  auto dx = linear_solve(lu_factors, r);
438 
439  if constexpr (is_zero<decltype(dx)>{}) {
440  return x;
441  } else {
442  return make_dual(x, dx);
443  }
444 }
445 
449 template <typename T, int n>
450 SERAC_HOST_DEVICE constexpr auto make_dual(const tensor<T, n>& x, const tensor<T, n>& dx)
451 {
452  return make_tensor<n>([&](int i) { return dual<T>{x[i], dx[i]}; });
453 }
454 
458 template <typename T, int m, int n>
459 SERAC_HOST_DEVICE constexpr auto make_dual(const tensor<T, m, n>& x, const tensor<T, m, n>& dx)
460 {
461  return make_tensor<m, n>([&](int i, int j) { return dual<T>{x[i][j], dx[i][j]}; });
462 }
463 
473 template <typename gradient_type, int n>
475 {
476  auto invA = inv(get_value(A));
477  return make_tensor<n, n>([&](int i, int j) {
478  auto value = invA[i][j];
479  gradient_type gradient{};
480  for (int k = 0; k < n; k++) {
481  for (int l = 0; l < n; l++) {
482  gradient -= invA[i][k] * A[k][l].gradient * invA[l][j];
483  }
484  }
485  return dual<gradient_type>{value, gradient};
486  });
487 }
488 
493 template <typename T, int... n>
494 SERAC_HOST_DEVICE auto get_value(const tensor<dual<T>, n...>& arg)
495 {
496  tensor<double, n...> value{};
497  for_constexpr<n...>([&](auto... i) { value(i...) = arg(i...).value; });
498  return value;
499 }
500 
505 template <int... n>
506 SERAC_HOST_DEVICE constexpr auto get_gradient(const tensor<dual<double>, n...>& arg)
507 {
508  tensor<double, n...> g{};
509  for_constexpr<n...>([&](auto... i) { g(i...) = arg(i...).gradient; });
510  return g;
511 }
512 
514 template <int... n, int... m>
516 {
517  tensor<double, n..., m...> g{};
518  for_constexpr<n...>([&](auto... i) { g(i...) = arg(i...).gradient; });
519  return g;
520 }
521 
525 struct SolverStatus {
526  bool converged;
527  unsigned int iterations;
528  double residual;
529 };
530 
535  double xtol;
536  double rtol;
537  unsigned int max_iter;
538 };
539 
541 const ScalarSolverOptions default_solver_options{.xtol = 1e-8, .rtol = 0, .max_iter = 25};
542 
570 template <typename function, typename... ParamTypes>
571 auto solve_scalar_equation(function&& f, double x0, double lower_bound, double upper_bound, ScalarSolverOptions options,
572  ParamTypes... params)
573 {
574  double x, df_dx;
575  double fl = f(lower_bound, get_value(params)...);
576  double fh = f(upper_bound, get_value(params)...);
577 
578  SLIC_ERROR_ROOT_IF(fl * fh > 0, "solve_scalar_equation: root not bracketed by input bounds.");
579 
580  unsigned int iterations = 0;
581  bool converged = false;
582 
583  // handle corner cases where one of the brackets is the root
584  if (fl == 0) {
585  x = lower_bound;
586  converged = true;
587  } else if (fh == 0) {
588  x = upper_bound;
589  converged = true;
590  }
591 
592  if (converged) {
593  df_dx = get_gradient(f(make_dual(x), get_value(params)...));
594 
595  } else {
596  // orient search so that f(xl) < 0
597  double xl = lower_bound;
598  double xh = upper_bound;
599  if (fl > 0) {
600  xl = upper_bound;
601  xh = lower_bound;
602  }
603 
604  // move initial guess if it is not between brackets
605  if (x0 < lower_bound || x0 > upper_bound) {
606  x0 = 0.5 * (lower_bound + upper_bound);
607  }
608 
609  x = x0;
610  double delta_x_old = std::abs(upper_bound - lower_bound);
611  double delta_x = delta_x_old;
612  auto R = f(make_dual(x), get_value(params)...);
613  auto fval = get_value(R);
614  df_dx = get_gradient(R);
615 
616  while (!converged) {
617  if (iterations == options.max_iter) {
618  SLIC_WARNING("solve_scalar_equation failed to converge in allotted iterations.");
619  break;
620  }
621 
622  // use bisection if Newton oversteps brackets or is not decreasing sufficiently
623  if ((x - xh) * df_dx - fval > 0 || (x - xl) * df_dx - fval < 0 ||
624  std::abs(2. * fval) > std::abs(delta_x_old * df_dx)) {
625  delta_x_old = delta_x;
626  delta_x = 0.5 * (xh - xl);
627  x = xl + delta_x;
628  converged = (x == xl);
629  } else { // use Newton step
630  delta_x_old = delta_x;
631  delta_x = fval / df_dx;
632  auto temp = x;
633  x -= delta_x;
634  converged = (x == temp);
635  }
636 
637  // function and jacobian evaluation
638  R = f(make_dual(x), get_value(params)...);
639  fval = get_value(R);
640  df_dx = get_gradient(R);
641 
642  // convergence check
643  converged = converged || (std::abs(delta_x) < options.xtol) || (std::abs(fval) < options.rtol);
644 
645  // maintain bracket on root
646  if (fval < 0) {
647  xl = x;
648  } else {
649  xh = x;
650  }
651 
652  ++iterations;
653  }
654  }
655 
656  // Accumulate derivatives so that the user can get derivatives
657  // with respect to parameters, subject to constraing that f(x, p) = 0 for all p
658  // Conceptually, we're doing the following:
659  // [fval, df_dp] = f(get_value(x), p)
660  // df = 0
661  // for p in params:
662  // df += inner(df_dp, dp)
663  // dx = -df / df_dx
664  constexpr bool contains_duals =
666  if constexpr (contains_duals) {
667  auto [fval, df] = f(x, params...);
668  auto dx = -df / df_dx;
669  SolverStatus status{.converged = converged, .iterations = iterations, .residual = fval};
670  return tuple{dual{x, dx}, status};
671  }
672  if constexpr (!contains_duals) {
673  auto fval = f(x, params...);
674  SolverStatus status{.converged = converged, .iterations = iterations, .residual = fval};
675  return tuple{x, status};
676  }
677 }
678 
691 template <typename function, int n>
692 auto find_root(function&& f, tensor<double, n> x0)
693 {
694  static_assert(std::is_same_v<decltype(f(x0)), tensor<double, n>>,
695  "error: f(x) must have the same number of equations as unknowns");
696 
697  double epsilon = 1.0e-8;
698  int max_iterations = 10;
699 
700  auto x = x0;
701 
702  for (int k = 0; k < max_iterations; k++) {
703  auto output = f(make_dual(x));
704  auto r = get_value(output);
705  if (norm(r) < epsilon) break;
706  auto J = get_gradient(output);
707  x -= linear_solve(J, r);
708  }
709 
710  return x;
711 };
712 
721 template <typename T, int size>
723 {
724  // put tensor values in an mfem::DenseMatrix
725  mfem::DenseMatrix matA(size, size);
726  for (int i = 0; i < size; i++) {
727  for (int j = 0; j < size; j++) {
728  if constexpr (is_dual_number<T>::value) {
729  matA(i, j) = A[i][j].value;
730  } else {
731  matA(i, j) = A[i][j];
732  }
733  }
734  }
735 
736  // compute eigendecomposition
737  mfem::DenseMatrixEigensystem eig_sys(matA);
738  eig_sys.Eval();
739 
740  serac::tensor<T, size> output;
741 
742  for (int k = 0; k < size; k++) {
743  // extract eigenvalues
744  output[k] = eig_sys.Eigenvalue(k);
745 
746  // and calculate their derivatives, when appropriate
747  if constexpr (is_dual_number<T>::value) {
748  tensor<double, size> phi = make_tensor<size>([&](int i) { return eig_sys.Eigenvector(k)[i]; });
749  auto dA = make_tensor<size, size>([&](int i, int j) { return A(i, j).gradient; });
750  output[k].gradient = dot(phi, dA, phi);
751  }
752  }
753 
754  return output;
755 }
756 
757 } // namespace serac
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38
This file contains the declaration of a dual number class.
constexpr SERAC_HOST_DEVICE void for_constexpr(lambda &&f)
multidimensional loop tool that evaluates the lambda body inside the innermost loop.
Accelerator functionality.
Definition: serac.cpp:38
constexpr SERAC_HOST_DEVICE auto make_dual(double x)
promote a value to a dual number of the appropriate type
Definition: dual.hpp:412
typename detail::outer_prod< T1, T2 >::type outer_product_t
a type function that returns the tensor type of an outer product of two tensors
Definition: tensor.hpp:1741
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
SERAC_HOST_DEVICE auto apply(lambda f, tuple< T... > &args)
a way of passing an n-tuple to a function that expects n separate arguments
Definition: tuple.hpp:668
constexpr SERAC_HOST_DEVICE auto operator*(const dual< gradient_type > &a, double b)
multiplication of a dual number and a non-dual number
Definition: dual.hpp:109
SERAC_HOST_DEVICE auto promote_each_to_dual_when(const tensor< T, n > &x)
a function that optionally (decided at compile time) converts a list of values to their dual types
auto find_root(function &&f, tensor< double, n > x0)
Finds a root of a vector-valued nonlinear function.
constexpr SERAC_HOST_DEVICE auto make_dual_helper(zero)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr auto make_dual_wrt(const serac::tuple< T... > &args)
take a tuple of values, and promote the nth one to a one-hot dual number of the appropriate type
SERAC_HOST_DEVICE auto promote_to_dual_when(const T &x)
a function that optionally (decided at compile time) converts a value to its dual type
constexpr SERAC_HOST_DEVICE int size(const tensor< T, n... > &)
returns the total number of stored values in a tensor
Definition: tensor.hpp:1851
constexpr SERAC_HOST_DEVICE auto linear_solve(const LuFactorization< S, n > &lu_factors, const tensor< T, n, m... > &b)
Definition: tensor.hpp:1538
SERAC_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
Definition: tuple.hpp:180
constexpr SERAC_HOST_DEVICE auto get_value(const T &arg)
return the "value" part from a given type. For non-dual types, this is just the identity function
Definition: dual.hpp:416
SERAC_HOST_DEVICE auto abs(dual< gradient_type > x)
Implementation of absolute value function for dual numbers.
Definition: dual.hpp:220
const ScalarSolverOptions default_solver_options
Default options for solve_scalar_equation.
auto solve_scalar_equation(function &&f, double x0, double lower_bound, double upper_bound, ScalarSolverOptions options, ParamTypes... params)
Solves a nonlinear scalar-valued equation and gives derivatives of solution to parameters.
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:274
constexpr SERAC_HOST_DEVICE auto norm(const isotropic_tensor< T, m, m > &I)
compute the Frobenius norm (sqrt(tr(dot(transpose(I), I)))) of an isotropic tensor
constexpr SERAC_HOST_DEVICE auto inv(const isotropic_tensor< T, m, m > &I)
return the inverse of an isotropic tensor
constexpr SERAC_HOST_DEVICE LuFactorization< T, n > factorize_lu(const tensor< T, n, n > &A)
Compute LU factorization of a matrix with partial pivoting.
auto eigenvalues(const serac::tensor< T, size, size > &A)
compute the eigenvalues of a symmetric matrix A
typename one_hot< i, n, T >::type one_hot_t
a tuple type with n entries, all of which are of type serac::zero, except for the i^{th} entry,...
constexpr SERAC_HOST_DEVICE auto get_gradient(dual< gradient_type > arg)
return the "gradient" part from a dual number type
Definition: dual.hpp:430
constexpr SERAC_HOST_DEVICE auto operator/(const dual< gradient_type > &a, double b)
division of a dual number by a non-dual number
Definition: dual.hpp:130
Representation of an LU factorization.
Definition: tensor.hpp:1459
Settings for solve_scalar_equation.
unsigned int max_iter
maximum allowed number of iterations
double xtol
absolute tolerance on Newton correction
double rtol
absolute tolerance on absolute value of residual
Status and diagnostics of nonlinear equation solvers.
bool converged
converged Flag indicating whether solver converged to a solution or aborted.
double residual
Final value of residual.
unsigned int iterations
Number of iterations taken.
Dual number struct (value plus gradient)
Definition: dual.hpp:29
double value
the actual numerical value
Definition: dual.hpp:30
class for checking if a type is a dual number or not
Definition: dual.hpp:437
class for checking if a type is a tensor of dual numbers or not
static constexpr bool value
whether or not type T is a dual number
checks if a type is zero
Definition: tensor.hpp:151
Arbitrary-rank tensor class.
Definition: tensor.hpp:29
Type that mimics std::tuple.
Definition: tuple.hpp:61
Type that mimics std::tuple.
Definition: tuple.hpp:48
This is a class that mimics most of std::tuple's interface, except that it is usable in CUDA kernels ...
Definition: tuple.hpp:28
A sentinel struct for eliding no-op tensor operations.
Definition: tensor.hpp:123
Implementation of the tensor class used by Functional.
Implements a std::tuple-like object that works in CUDA kernels.