Serac  0.1
Serac is an implicit thermal strucural mechanics simulation code.
tensor.hpp
Go to the documentation of this file.
1 // Copyright (c) 2019-2024, Lawrence Livermore National Security, LLC and
2 // other Serac Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
13 #pragma once
14 
16 
18 
19 #include <cmath>
20 
21 namespace serac {
22 
28 template <typename T, int... n>
29 struct tensor;
30 
32 template <typename T, int m, int... n>
33 struct tensor<T, m, n...> {
34  template <typename i_type>
35  SERAC_HOST_DEVICE constexpr auto& operator()(i_type i)
36  {
37  return data[i];
38  }
39  template <typename i_type>
40  SERAC_HOST_DEVICE constexpr auto& operator()(i_type i) const
41  {
42  return data[i];
43  }
44  template <typename i_type, typename... jklm_type>
45  SERAC_HOST_DEVICE constexpr auto& operator()(i_type i, jklm_type... jklm)
46  {
47  return data[i](jklm...);
48  }
49  template <typename i_type, typename... jklm_type>
50  SERAC_HOST_DEVICE constexpr auto& operator()(i_type i, jklm_type... jklm) const
51  {
52  return data[i](jklm...);
53  }
54 
55  SERAC_HOST_DEVICE constexpr auto& operator[](int i) { return data[i]; }
56  SERAC_HOST_DEVICE constexpr const auto& operator[](int i) const { return data[i]; }
57 
58  tensor<T, n...> data[m];
59 };
60 
61 template <typename T, int m>
62 struct tensor<T, m> {
63  template <typename i_type>
64  SERAC_HOST_DEVICE constexpr auto& operator()(i_type i)
65  {
66  return data[i];
67  }
68  template <typename i_type>
69  SERAC_HOST_DEVICE constexpr auto& operator()(i_type i) const
70  {
71  return data[i];
72  }
73  SERAC_HOST_DEVICE constexpr auto& operator[](int i) { return data[i]; }
74  SERAC_HOST_DEVICE constexpr const auto& operator[](int i) const { return data[i]; }
75 
77  SERAC_HOST_DEVICE constexpr operator T()
78  {
79  return data[0];
80  }
81 
83  SERAC_HOST_DEVICE constexpr operator T() const
84  {
85  return data[0];
86  }
87 
88  T data[m];
89 };
91 
100 template <typename T, int n1>
101 tensor(const T (&data)[n1]) -> tensor<T, n1>;
102 
111 template <typename T, int n1, int n2>
112 tensor(const T (&data)[n1][n2]) -> tensor<T, n1, n2>;
113 
116 
119 
123 struct zero {
125  SERAC_HOST_DEVICE operator double() { return 0.0; }
126 
128  template <typename T, int... n>
129  SERAC_HOST_DEVICE operator tensor<T, n...>()
130  {
131  return tensor<T, n...>{};
132  }
133 
135  template <typename... T>
136  SERAC_HOST_DEVICE auto operator()(T...) const
137  {
138  return zero{};
139  }
140 
142  template <typename T>
144  {
145  return zero{};
146  }
147 };
148 
150 template <typename T>
151 struct is_zero : std::false_type {
152 };
153 
155 template <>
156 struct is_zero<zero> : std::true_type {
157 };
158 
160 SERAC_HOST_DEVICE constexpr auto operator+(zero, zero) { return zero{}; }
161 
163 template <typename T>
164 SERAC_HOST_DEVICE constexpr auto operator+(zero, T other)
165 {
166  return other;
167 }
168 
170 template <typename T>
171 SERAC_HOST_DEVICE constexpr auto operator+(T other, zero)
172 {
173  return other;
174 }
175 
177 
179 SERAC_HOST_DEVICE constexpr auto operator-(zero) { return zero{}; }
180 
182 SERAC_HOST_DEVICE constexpr auto operator-(zero, zero) { return zero{}; }
183 
185 template <typename T>
186 SERAC_HOST_DEVICE constexpr auto operator-(zero, T other)
187 {
188  return -other;
189 }
190 
192 template <typename T>
193 SERAC_HOST_DEVICE constexpr auto operator-(T other, zero)
194 {
195  return other;
196 }
197 
199 
201 SERAC_HOST_DEVICE constexpr auto operator*(zero, zero) { return zero{}; }
202 
204 template <typename T>
205 SERAC_HOST_DEVICE constexpr auto operator*(zero, T /*other*/)
206 {
207  return zero{};
208 }
209 
211 template <typename T>
212 SERAC_HOST_DEVICE constexpr auto operator*(T /*other*/, zero)
213 {
214  return zero{};
215 }
216 
218 template <typename T>
219 SERAC_HOST_DEVICE constexpr auto operator/(zero, T /*other*/)
220 {
221  return zero{};
222 }
223 
225 template <typename T>
226 void operator/(T, zero)
227 {
228  static_assert(::detail::always_false<T>{}, "Error: Can't divide by zero!");
229 }
230 
232 SERAC_HOST_DEVICE constexpr auto operator+=(zero, zero) { return zero{}; }
233 
235 SERAC_HOST_DEVICE constexpr auto operator-=(zero, zero) { return zero{}; }
236 
238 template <int i>
240 {
241  return x;
242 }
243 
245 template <int i>
247 {
248  return zero{};
249 }
250 
252 template <typename T>
253 SERAC_HOST_DEVICE constexpr zero dot(const T&, zero)
254 {
255  return zero{};
256 }
257 
259 template <typename T>
260 SERAC_HOST_DEVICE constexpr zero dot(zero, const T&)
261 {
262  return zero{};
263 }
264 
272 template <typename T, int n1, int n2 = 1>
273 using reduced_tensor = std::conditional_t<
274  (n1 == 1 && n2 == 1), double,
275  std::conditional_t<n1 == 1, tensor<T, n2>, std::conditional_t<n2 == 1, tensor<T, n1>, tensor<T, n1, n2>>>>;
276 
282 template <typename T, int... n>
283 SERAC_HOST_DEVICE constexpr auto tensor_with_shape(std::integer_sequence<int, n...>)
284 {
285  return tensor<T, n...>{};
286 }
287 
299 template <typename lambda_type>
300 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)
301 {
302  using T = decltype(f());
303  return tensor<T>{f()};
304 }
305 
318 template <int n1, typename lambda_type>
319 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)
320 {
321  using T = decltype(f(n1));
322  tensor<T, n1> A{};
323  for (int i = 0; i < n1; i++) {
324  A(i) = f(i);
325  }
326  return A;
327 }
328 
342 template <int n1, int n2, typename lambda_type>
343 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)
344 {
345  using T = decltype(f(n1, n2));
346  tensor<T, n1, n2> A{};
347  for (int i = 0; i < n1; i++) {
348  for (int j = 0; j < n2; j++) {
349  A(i, j) = f(i, j);
350  }
351  }
352  return A;
353 }
354 
369 template <int n1, int n2, int n3, typename lambda_type>
370 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)
371 {
372  using T = decltype(f(n1, n2, n3));
374  for (int i = 0; i < n1; i++) {
375  for (int j = 0; j < n2; j++) {
376  for (int k = 0; k < n3; k++) {
377  A(i, j, k) = f(i, j, k);
378  }
379  }
380  }
381  return A;
382 }
383 
399 template <int n1, int n2, int n3, int n4, typename lambda_type>
400 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)
401 {
402  using T = decltype(f(n1, n2, n3, n4));
404  for (int i = 0; i < n1; i++) {
405  for (int j = 0; j < n2; j++) {
406  for (int k = 0; k < n3; k++) {
407  for (int l = 0; l < n4; l++) {
408  A(i, j, k, l) = f(i, j, k, l);
409  }
410  }
411  }
412  }
413  return A;
414 }
415 
424 template <typename S, typename T, int m, int... n>
426 {
427  tensor<decltype(S{} + T{}), m, n...> C{};
428  for (int i = 0; i < m; i++) {
429  C[i] = A[i] + B[i];
430  }
431  return C;
432 }
433 
440 template <typename T, int m, int... n>
442 {
443  tensor<T, m, n...> B{};
444  for (int i = 0; i < m; i++) {
445  B[i] = -A[i];
446  }
447  return B;
448 }
449 
458 template <typename S, typename T, int m, int... n>
460 {
461  tensor<decltype(S{} + T{}), m, n...> C{};
462  for (int i = 0; i < m; i++) {
463  C[i] = A[i] - B[i];
464  }
465  return C;
466 }
467 
476 template <typename S, typename T, int m, int... n>
478 {
479  for (int i = 0; i < m; i++) {
480  A[i] += B[i];
481  }
482  return A;
483 }
484 
485 #if 0
492 template <typename T>
493 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T>& A, const T& B)
494 {
495  return A.data += B;
496 }
497 #endif
498 
505 template <typename T, int n>
507 {
508  for (int i = 0; i < n; i++) {
509  A.data[i][0] += B[i];
510  }
511  return A;
512 }
513 
520 template <typename T, int n>
522 {
523  for (int i = 0; i < n; i++) {
524  A.data[0][i] += B[i];
525  }
526  return A;
527 }
528 
535 template <typename T>
536 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1>& A, const T& B)
537 {
538  return A.data[0] += B;
539 }
540 
547 template <typename T>
548 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1, 1>& A, const T& B)
549 {
550  return A.data[0][0] += B;
551 }
552 
559 template <typename T, int... n>
561 {
562  return A;
563 }
564 
573 template <typename S, typename T, int m, int... n>
575 {
576  for (int i = 0; i < m; i++) {
577  A[i] -= B[i];
578  }
579  return A;
580 }
581 
588 template <typename T, int... n>
590 {
591  return A;
592 }
593 
598 template <typename T, int n>
599 SERAC_HOST_DEVICE constexpr auto outer(double A, tensor<T, n> B)
600 {
601  tensor<decltype(double{} * T{}), n> AB{};
602  for (int i = 0; i < n; i++) {
603  AB[i] = A * B[i];
604  }
605  return AB;
606 }
607 
612 template <typename T, int m>
613 SERAC_HOST_DEVICE constexpr auto outer(const tensor<T, m>& A, double B)
614 {
615  tensor<decltype(T{} * double{}), m> AB{};
616  for (int i = 0; i < m; i++) {
617  AB[i] = A[i] * B;
618  }
619  return AB;
620 }
621 
626 template <typename T, int n>
627 SERAC_HOST_DEVICE constexpr auto outer(zero, const tensor<T, n>&)
628 {
629  return zero{};
630 }
631 
636 template <typename T, int n>
637 SERAC_HOST_DEVICE constexpr auto outer(const tensor<T, n>&, zero)
638 {
639  return zero{};
640 }
641 
646 template <typename S, typename T, int m, int n>
647 SERAC_HOST_DEVICE constexpr auto outer(const tensor<S, m>& A, const tensor<T, n>& B)
648 {
649  tensor<decltype(S{} * T{}), m, n> AB{};
650  for (int i = 0; i < m; i++) {
651  for (int j = 0; j < n; j++) {
652  AB[i][j] = A[i] * B[j];
653  }
654  }
655  return AB;
656 }
657 
667 template <typename S, typename T, int m, int n>
668 SERAC_HOST_DEVICE constexpr auto inner(const tensor<S, m, n>& A, const tensor<T, m, n>& B)
669 {
670  decltype(S{} * T{}) sum{};
671  for (int i = 0; i < m; i++) {
672  for (int j = 0; j < n; j++) {
673  sum += A[i][j] * B[i][j];
674  }
675  }
676  return sum;
677 }
678 
683 template <typename S, typename T, int m>
684 SERAC_HOST_DEVICE constexpr auto inner(const tensor<S, m>& A, const tensor<T, m>& B)
685 {
686  decltype(S{} * T{}) sum{};
687  for (int i = 0; i < m; i++) {
688  sum += A[i] * B[i];
689  }
690  return sum;
691 }
692 
697 SERAC_HOST_DEVICE constexpr auto inner(double A, double B) { return A * B; }
698 
707 template <typename S, typename T, int m, int n, int p>
708 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p>& B)
709 {
710  tensor<decltype(S{} * T{}), m, p> AB{};
711  for (int i = 0; i < m; i++) {
712  for (int j = 0; j < p; j++) {
713  for (int k = 0; k < n; k++) {
714  AB[i][j] = AB[i][j] + A[i][k] * B[k][j];
715  }
716  }
717  }
718  return AB;
719 }
720 
725 template <typename T, int m>
726 SERAC_HOST_DEVICE constexpr auto dot(const tensor<T, m>& A, double B)
727 {
728  return A * B;
729 }
730 
735 template <typename T, int m>
736 SERAC_HOST_DEVICE constexpr auto dot(double B, const tensor<T, m>& A)
737 {
738  return B * A;
739 }
740 
745 template <typename S, typename T, int m>
746 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m>& B)
747 {
748  decltype(S{} * T{}) AB{};
749  for (int i = 0; i < m; i++) {
750  AB = AB + A[i] * B[i];
751  }
752  return AB;
753 }
754 
759 template <typename S, typename T, int m, int n>
760 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n>& B)
761 {
762  tensor<decltype(S{} * T{}), n> AB{};
763  for (int i = 0; i < n; i++) {
764  for (int j = 0; j < m; j++) {
765  AB[i] = AB[i] + A[j] * B[j][i];
766  }
767  }
768  return AB;
769 }
770 
775 template <typename S, typename T, int m, int n, int p>
776 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n, p>& B)
777 {
778  tensor<decltype(S{} * T{}), n, p> AB{};
779  for (int j = 0; j < m; j++) {
780  AB = AB + A[j] * B[j];
781  }
782  return AB;
783 }
784 
795 template <typename S, typename T, int m, int n, int p, int q>
796 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n, p, q>& B)
797 {
798  tensor<decltype(S{} * T{}), n, p, q> AB{};
799  for (int j = 0; j < m; j++) {
800  AB = AB + A[j] * B[j];
801  }
802  return AB;
803 }
804 
809 template <typename S, typename T, int m, int n>
810 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n>& B)
811 {
812  tensor<decltype(S{} * T{}), m> AB{};
813  for (int i = 0; i < m; i++) {
814  for (int j = 0; j < n; j++) {
815  AB[i] = AB[i] + A[i][j] * B[j];
816  }
817  }
818  return AB;
819 }
820 
825 template <typename S, typename T, int m, int n, int p, int q, int r>
826 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p, q, r>& B)
827 {
828  tensor<decltype(S{} * T{}), m, p, q, r> AB{};
829  for (int i = 0; i < m; i++) {
830  for (int j = 0; j < n; j++) {
831  AB[i] = AB[i] + A[i][j] * B[j];
832  }
833  }
834  return AB;
835 }
836 
841 template <typename S, typename T, int m, int n, int p, int q>
842 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p, q>& B)
843 {
844  tensor<decltype(S{} * T{}), m, p, q> AB{};
845  for (int i = 0; i < m; i++) {
846  for (int j = 0; j < n; j++) {
847  AB[i] = AB[i] + A[i][j] * B[j];
848  }
849  }
850  return AB;
851 }
852 
857 template <typename S, typename T, int m, int n, int p>
858 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n, p>& A, const tensor<T, p>& B)
859 {
860  tensor<decltype(S{} * T{}), m, n> AB{};
861  for (int i = 0; i < m; i++) {
862  for (int j = 0; j < n; j++) {
863  for (int k = 0; k < p; k++) {
864  AB[i][j] += A[i][j][k] * B[k];
865  }
866  }
867  }
868  return AB;
869 }
870 
875 template <typename S, typename T, typename U, int m, int n>
876 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& u, const tensor<T, m, n>& A, const tensor<U, n>& v)
877 {
878  decltype(S{} * T{} * U{}) uAv{};
879  for (int i = 0; i < m; i++) {
880  for (int j = 0; j < n; j++) {
881  uAv += u[i] * A[i][j] * v[j];
882  }
883  }
884  return uAv;
885 }
886 
888 template <typename S, typename T, int m, int n, int p, int q>
889 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n, p, q>& A, const tensor<T, q>& B)
890 {
891  tensor<decltype(S{} * T{}), m, n, p> AB{};
892  for (int i = 0; i < m; i++) {
893  for (int j = 0; j < n; j++) {
894  for (int k = 0; k < p; k++) {
895  for (int l = 0; l < q; l++) {
896  AB[i][j][k] += A[i][j][k][l] * B[l];
897  }
898  }
899  }
900  }
901  return AB;
902 }
903 
905 template <typename T>
906 auto cross(const tensor<T, 3, 2>& A)
907 {
908  return tensor<T, 3>{A(1, 0) * A(2, 1) - A(2, 0) * A(1, 1), A(2, 0) * A(0, 1) - A(0, 0) * A(2, 1),
909  A(0, 0) * A(1, 1) - A(1, 0) * A(0, 1)};
910 }
911 
913 template <typename T>
914 auto cross(const tensor<T, 2, 1>& v)
915 {
916  return tensor<T, 2>{v(1, 0), -v(0, 0)};
917 }
918 
920 template <typename T>
921 auto cross(const tensor<T, 2>& v)
922 {
923  return tensor<T, 2>{v[1], -v[0]};
924 }
925 
927 template <typename S, typename T>
928 auto cross(const tensor<S, 3>& u, const tensor<T, 3>& v)
929 {
930  return tensor<decltype(S{} * T{}), 3>{u(1) * v(2) - u(2) * v(1), u(2) * v(0) - u(0) * v(2),
931  u(0) * v(1) - u(1) * v(0)};
932 }
933 
945 template <typename S, typename T, int m, int n, int p, int q>
947 {
948  tensor<decltype(S{} * T{}), m, n> AB{};
949  for (int i = 0; i < m; i++) {
950  for (int j = 0; j < n; j++) {
951  for (int k = 0; k < p; k++) {
952  for (int l = 0; l < q; l++) {
953  AB[i][j] += A[i][j][k][l] * B[k][l];
954  }
955  }
956  }
957  }
958  return AB;
959 }
960 
965 template <typename S, typename T, int m, int n, int p>
967 {
968  tensor<decltype(S{} * T{}), m> AB{};
969  for (int i = 0; i < m; i++) {
970  for (int j = 0; j < n; j++) {
971  for (int k = 0; k < p; k++) {
972  AB[i] += A[i][j][k] * B[j][k];
973  }
974  }
975  }
976  return AB;
977 }
978 
983 template <typename S, typename T, int m, int n>
984 constexpr auto double_dot(const tensor<S, m, n>& A, const tensor<T, m, n>& B)
985 {
986  decltype(S{} * T{}) AB{};
987  for (int i = 0; i < m; i++) {
988  for (int j = 0; j < n; j++) {
989  AB += A[i][j] * B[i][j];
990  }
991  }
992  return AB;
993 }
994 
998 template <typename S, typename T, int... m, int... n>
999 SERAC_HOST_DEVICE constexpr auto operator*(const tensor<S, m...>& A, const tensor<T, n...>& B)
1000 {
1001  return dot(A, B);
1002 }
1003 
1008 template <typename T, int m>
1010 {
1011  T total{};
1012  for (int i = 0; i < m; i++) {
1013  total += A[i] * A[i];
1014  }
1015  return total;
1016 }
1017 
1019 template <typename T, int m, int n>
1021 {
1022  T total{};
1023  for (int i = 0; i < m; i++) {
1024  for (int j = 0; j < n; j++) {
1025  total += A[i][j] * A[i][j];
1026  }
1027  }
1028  return total;
1029 }
1030 
1032 template <typename T, int... n>
1034 {
1035  T total{};
1036  for_constexpr<n...>([&](auto... i) { total += A(i...) * A(i...); });
1037  return total;
1038 }
1039 
1044 template <typename T, int... n>
1046 {
1047  using std::sqrt;
1048  return sqrt(squared_norm(A));
1049 }
1050 
1054 SERAC_HOST_DEVICE constexpr auto norm(zero) { return zero{}; }
1055 
1061 template <typename T, int... n>
1063 {
1064  return A / norm(A);
1065 }
1066 
1072 template <typename T, int n>
1073 SERAC_HOST_DEVICE constexpr auto tr(const tensor<T, n, n>& A)
1074 {
1075  T trA{};
1076  for (int i = 0; i < n; i++) {
1077  trA = trA + A[i][i];
1078  }
1079  return trA;
1080 }
1081 
1087 template <typename T, int n>
1088 SERAC_HOST_DEVICE constexpr auto sym(const tensor<T, n, n>& A)
1089 {
1090  tensor<T, n, n> symA{};
1091  for (int i = 0; i < n; i++) {
1092  for (int j = 0; j < n; j++) {
1093  symA[i][j] = 0.5 * (A[i][j] + A[j][i]);
1094  }
1095  }
1096  return symA;
1097 }
1098 
1104 template <typename T, int n>
1106 {
1107  tensor<T, n, n> antisymA{};
1108  for (int i = 0; i < n; i++) {
1109  for (int j = 0; j < n; j++) {
1110  antisymA[i][j] = 0.5 * (A[i][j] - A[j][i]);
1111  }
1112  }
1113  return antisymA;
1114 }
1115 
1123 template <typename T, int n>
1124 SERAC_HOST_DEVICE constexpr auto dev(const tensor<T, n, n>& A)
1125 {
1126  auto devA = A;
1127  auto trA = tr(A);
1128  for (int i = 0; i < n; i++) {
1129  devA[i][i] -= trA / n;
1130  }
1131  return devA;
1132 }
1133 
1140 template <typename T, int n>
1142 {
1143  tensor<T, n, n> D{};
1144  for (int i = 0; i < n; i++) {
1145  D[i][i] = A[i][i];
1146  }
1147  return D;
1148 }
1149 
1154 template <typename T, int n>
1156 {
1157  tensor<T, n, n> D{};
1158  for (int i = 0; i < n; i++) {
1159  D[i][i] = d[i];
1160  }
1161  return D;
1162 }
1163 
1168 template <typename T, int n>
1170 {
1171  tensor<T, n> d{};
1172  for (int i = 0; i < n; i++) {
1173  d[i] = D[i][i];
1174  }
1175  return d;
1176 }
1177 
1182 template <int dim>
1184 {
1186  for (int i = 0; i < dim; i++) {
1187  for (int j = 0; j < dim; j++) {
1188  I[i][j] = (i == j);
1189  }
1190  }
1191  return I;
1192 }
1193 
1198 template <typename T, int m, int n>
1200 {
1201  tensor<T, n, m> AT{};
1202  for (int i = 0; i < n; i++) {
1203  for (int j = 0; j < m; j++) {
1204  AT[i][j] = A[j][i];
1205  }
1206  }
1207  return AT;
1208 }
1209 
1214 template <typename T>
1215 SERAC_HOST_DEVICE constexpr auto det(const tensor<T, 2, 2>& A)
1216 {
1217  return A[0][0] * A[1][1] - A[0][1] * A[1][0];
1218 }
1220 template <typename T>
1221 SERAC_HOST_DEVICE constexpr auto det(const tensor<T, 3, 3>& A)
1222 {
1223  return A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] + A[0][2] * A[1][0] * A[2][1] -
1224  A[0][0] * A[1][2] * A[2][1] - A[0][1] * A[1][0] * A[2][2] - A[0][2] * A[1][1] * A[2][0];
1225 }
1226 
1237 template <typename T>
1239 {
1240  // From the Cayley-Hamilton theorem, we get that for any N by N matrix A,
1241  // det(A - I) - 1 = I1(A) + I2(A) + ... + IN(A),
1242  // where the In are the principal invariants of A.
1243  // We inline the definitions of the principal invariants to increase computational speed.
1244 
1245  // equivalent to tr(A) + det(A)
1246  return A(0, 0) - A(0, 1) * A(1, 0) + A(1, 1) + A(0, 0) * A(1, 1);
1247 }
1248 
1250 template <typename T>
1252 {
1253  // For notes on the implementation, see the 2x2 version.
1254 
1255  // clang-format off
1256  // equivalent to tr(A) + I2(A) + det(A)
1257  return A(0, 0) + A(1, 1) + A(2, 2)
1258  - A(0, 1) * A(1, 0) * (1 + A(2, 2))
1259  + A(0, 0) * A(1, 1) * (1 + A(2, 2))
1260  - A(0, 2) * A(2, 0) * (1 + A(1, 1))
1261  - A(1, 2) * A(2, 1) * (1 + A(0, 0))
1262  + A(0, 0) * A(2, 2)
1263  + A(1, 1) * A(2, 2)
1264  + A(0, 1) * A(1, 2) * A(2, 0)
1265  + A(0, 2) * A(1, 0) * A(2, 1);
1266  // clang-format on
1267 }
1268 
1277 template <typename T, int dim>
1279 {
1280  auto B = A;
1281  for (int i = 0; i < 15; i++) {
1282  B = 0.5 * (B + dot(A, inv(B)));
1283  }
1284  return B;
1285 }
1286 
1316 template <int i1, int i2, typename S, int m, int... n, typename T, int p, int q>
1318 {
1319  constexpr int Adims[] = {m, n...};
1320  constexpr int Bdims[] = {p, q};
1321  static_assert(sizeof...(n) < 3);
1322  static_assert(Adims[i1] == Bdims[i2], "error: incompatible tensor dimensions");
1323 
1324  // first, we have to figure out the dimensions of the output tensor
1325  constexpr int new_dim = (i2 == 0) ? q : p;
1326  constexpr int d1 = (i1 == 0) ? new_dim : Adims[0];
1327  constexpr int d2 = (i1 == 1) ? new_dim : Adims[1];
1328  constexpr int d3 = sizeof...(n) == 1 ? 0 : ((i1 == 2) ? new_dim : Adims[2]);
1329 
1330  // the type of the output tensor is easier to figure out
1331  using U = decltype(S{} * T{});
1332 
1333  auto C = []() {
1334  if constexpr (d3 == 0) return tensor<U, d1, d2>{};
1335  if constexpr (d3 != 0) return tensor<U, d1, d2, d3>{};
1336  }();
1337 
1338  if constexpr (d3 == 0) {
1339  for (int i = 0; i < d1; i++) {
1340  for (int j = 0; j < d2; j++) {
1341  U sum{};
1342  for (int k = 0; k < Adims[i1]; k++) {
1343  if constexpr (i1 == 0 && i2 == 0) sum += A(k, j) * B(k, i);
1344  if constexpr (i1 == 1 && i2 == 0) sum += A(i, k) * B(k, j);
1345  if constexpr (i1 == 0 && i2 == 1) sum += A(k, j) * B(i, k);
1346  if constexpr (i1 == 1 && i2 == 1) sum += A(i, k) * B(j, k);
1347  }
1348  C(i, j) = sum;
1349  }
1350  }
1351  } else {
1352  for (int i = 0; i < d1; i++) {
1353  for (int j = 0; j < d2; j++) {
1354  for (int k = 0; k < d3; k++) {
1355  U sum{};
1356  for (int l = 0; l < Adims[i1]; l++) {
1357  if constexpr (i1 == 0 && i2 == 0) sum += A(l, j, k) * B(l, i);
1358  if constexpr (i1 == 1 && i2 == 0) sum += A(i, l, k) * B(l, j);
1359  if constexpr (i1 == 2 && i2 == 0) sum += A(i, j, l) * B(l, k);
1360  if constexpr (i1 == 0 && i2 == 1) sum += A(l, j, k) * B(i, l);
1361  if constexpr (i1 == 1 && i2 == 1) sum += A(i, l, k) * B(j, l);
1362  if constexpr (i1 == 2 && i2 == 1) sum += A(i, j, l) * B(k, l);
1363  }
1364  C(i, j, k) = sum;
1365  }
1366  }
1367  }
1368  }
1369 
1370  return C;
1371 }
1372 
1374 template <int i1, int i2, typename T>
1375 SERAC_HOST_DEVICE auto contract(const zero&, const T&)
1376 {
1377  return zero{};
1378 }
1379 
1389 template <typename T, int... n>
1391 {
1392  return norm(A - B) / norm(A);
1393 }
1394 
1403 template <int n>
1404 SERAC_HOST_DEVICE bool is_symmetric(tensor<double, n, n> A, double tolerance = 1.0e-8)
1405 {
1406  for (int i = 0; i < n; ++i) {
1407  for (int j = i + 1; j < n; ++j) {
1408  if (std::abs(A(i, j) - A(j, i)) > tolerance) {
1409  return false;
1410  };
1411  }
1412  }
1413  return true;
1414 }
1415 
1425 {
1426  if (!is_symmetric(A)) {
1427  return false;
1428  }
1429  if (A(0, 0) < 0.0) {
1430  return false;
1431  }
1432  if (det(A) < 0.0) {
1433  return false;
1434  }
1435  return true;
1436 }
1439 {
1440  if (!is_symmetric(A)) {
1441  return false;
1442  }
1443  if (det(A) < 0.0) {
1444  return false;
1445  }
1446  auto subtensor = make_tensor<2, 2>([A](int i, int j) { return A(i, j); });
1447  if (!is_symmetric_and_positive_definite(subtensor)) {
1448  return false;
1449  }
1450  return true;
1451 }
1452 
1458 template <typename T, int n>
1463 };
1464 
1479 template <typename T, int n, int... m>
1481  const tensor<int, n>& P)
1482 {
1483  tensor<T, n, m...> y{};
1484  for (int i = 0; i < n; i++) {
1485  auto c = b[P[i]];
1486  for (int j = 0; j < i; j++) {
1487  c -= L[i][j] * y[j];
1488  }
1489  y[i] = c / L[i][i];
1490  }
1491  return y;
1492 }
1493 
1498 template <typename T, int n, int... m>
1500 {
1501  // no permutation provided, so just map each equation to itself
1502  // TODO make a convienience function for ranges like this
1503  // BT 05/09/2022
1504  tensor<int, n> P(make_tensor<n>([](auto i) { return i; }));
1505 
1506  return solve_lower_triangular(L, b, P);
1507 }
1508 
1519 template <typename T, int n, int... m>
1521 {
1522  tensor<T, n, m...> x{};
1523  for (int i = n - 1; i >= 0; i--) {
1524  auto c = y[i];
1525  for (int j = i + 1; j < n; j++) {
1526  c -= U[i][j] * x[j];
1527  }
1528  x[i] = c / U[i][i];
1529  }
1530  return x;
1531 }
1532 
1537 template <typename S, typename T, int n, int... m>
1538 SERAC_HOST_DEVICE constexpr auto linear_solve(const LuFactorization<S, n>& lu_factors, const tensor<T, n, m...>& b)
1539 {
1540  // Forward substitution
1541  // solve Ly = b
1542  const auto y = solve_lower_triangular(lu_factors.L, b, lu_factors.P);
1543 
1544  // Back substitution
1545  // Solve Ux = y
1546  return solve_upper_triangular(lu_factors.U, y);
1547 }
1548 
1553 template <typename T, int n>
1554 SERAC_HOST_DEVICE constexpr auto linear_solve(const LuFactorization<T, n>& /* lu_factors */, const zero /* b */)
1555 {
1556  return zero{};
1557 }
1558 
1565 {
1566  double inv_detA(1.0 / det(A));
1567 
1568  tensor<double, 2, 2> invA{};
1569 
1570  invA[0][0] = A[1][1] * inv_detA;
1571  invA[0][1] = -A[0][1] * inv_detA;
1572  invA[1][0] = -A[1][0] * inv_detA;
1573  invA[1][1] = A[0][0] * inv_detA;
1574 
1575  return invA;
1576 }
1577 
1583 {
1584  double inv_detA(1.0 / det(A));
1585 
1586  tensor<double, 3, 3> invA{};
1587 
1588  invA[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) * inv_detA;
1589  invA[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) * inv_detA;
1590  invA[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) * inv_detA;
1591  invA[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) * inv_detA;
1592  invA[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) * inv_detA;
1593  invA[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) * inv_detA;
1594  invA[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) * inv_detA;
1595  invA[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) * inv_detA;
1596  invA[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) * inv_detA;
1597 
1598  return invA;
1599 }
1605 template <typename T, int n>
1606 SERAC_HOST_DEVICE constexpr auto inv(const tensor<T, n, n>& A)
1607 {
1608  auto I = DenseIdentity<n>();
1609  return linear_solve(A, I);
1610 }
1611 
1620 template <typename T, int m, int... n>
1621 auto& operator<<(std::ostream& out, const tensor<T, m, n...>& A)
1622 {
1623  out << '{' << A[0];
1624  for (int i = 1; i < m; i++) {
1625  out << ", " << A[i];
1626  }
1627  out << '}';
1628  return out;
1629 }
1630 
1636 inline auto& operator<<(std::ostream& out, zero)
1637 {
1638  out << "zero";
1639  return out;
1640 }
1641 
1647 inline SERAC_HOST_DEVICE void print(double value) { printf("%f", value); }
1648 
1653 template <int m, int... n>
1655 {
1656  printf("{");
1657  print(A[0]);
1658  for (int i = 1; i < m; i++) {
1659  printf(",");
1660  print(A[i]);
1661  }
1662  printf("}");
1663 }
1664 
1669 template <int n>
1670 SERAC_HOST_DEVICE constexpr auto chop(const tensor<double, n>& A)
1671 {
1672  auto copy = A;
1673  for (int i = 0; i < n; i++) {
1674  if (copy[i] * copy[i] < 1.0e-20) {
1675  copy[i] = 0.0;
1676  }
1677  }
1678  return copy;
1679 }
1680 
1682 template <int m, int n>
1684 {
1685  auto copy = A;
1686  for (int i = 0; i < m; i++) {
1687  for (int j = 0; j < n; j++) {
1688  if (copy[i][j] * copy[i][j] < 1.0e-20) {
1689  copy[i][j] = 0.0;
1690  }
1691  }
1692  }
1693  return copy;
1694 }
1695 
1697 namespace detail {
1698 
1699 template <typename T1, typename T2>
1700 struct outer_prod;
1701 
1702 template <int... m, int... n>
1703 struct outer_prod<tensor<double, m...>, tensor<double, n...>> {
1704  using type = tensor<double, m..., n...>;
1705 };
1706 
1707 template <int... n>
1708 struct outer_prod<double, tensor<double, n...>> {
1709  using type = tensor<double, n...>;
1710 };
1711 
1712 template <int... n>
1713 struct outer_prod<tensor<double, n...>, double> {
1714  using type = tensor<double, n...>;
1715 };
1716 
1717 template <>
1718 struct outer_prod<double, double> {
1719  using type = tensor<double>;
1720 };
1721 
1722 template <typename T>
1723 struct outer_prod<zero, T> {
1724  using type = zero;
1725 };
1726 
1727 template <typename T>
1728 struct outer_prod<T, zero> {
1729  using type = zero;
1730 };
1731 
1732 } // namespace detail
1734 
1740 template <typename T1, typename T2>
1742 
1747 inline SERAC_HOST_DEVICE auto get_gradient(double /* arg */) { return zero{}; }
1748 
1754 template <int... n>
1755 SERAC_HOST_DEVICE constexpr auto get_gradient(const tensor<double, n...>& /* arg */)
1756 {
1757  return zero{};
1758 }
1759 
1763 SERAC_HOST_DEVICE constexpr auto chain_rule(const zero /* df_dx */, const zero /* dx */) { return zero{}; }
1764 
1769 template <typename T>
1770 SERAC_HOST_DEVICE constexpr auto chain_rule(const zero /* df_dx */, const T /* dx */)
1771 {
1772  return zero{};
1773 }
1774 
1779 template <typename T>
1780 SERAC_HOST_DEVICE constexpr auto chain_rule(const T /* df_dx */, const zero /* dx */)
1781 {
1782  return zero{};
1783 }
1784 
1789 SERAC_HOST_DEVICE constexpr auto chain_rule(const double df_dx, const double dx) { return df_dx * dx; }
1790 
1795 template <int... n>
1796 SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor<double, n...>& df_dx, const double dx)
1797 {
1798  return df_dx * dx;
1799 }
1800 
1805 template <int... n>
1807 {
1808  double total{};
1809  for_constexpr<n...>([&](auto... i) { total += df_dx(i...) * dx(i...); });
1810  return total;
1811 }
1812 
1817 template <int m, int... n>
1819 {
1820  tensor<double, m> total{};
1821  for (int i = 0; i < m; i++) {
1822  total[i] = chain_rule(df_dx[i], dx);
1823  }
1824  return total;
1825 }
1826 
1831 template <int m, int n, int... p>
1833 {
1834  tensor<double, m, n> total{};
1835  for (int i = 0; i < m; i++) {
1836  for (int j = 0; j < n; j++) {
1837  total[i][j] = chain_rule(df_dx[i][j], dx);
1838  }
1839  }
1840  return total;
1841 }
1842 
1850 template <typename T, int... n>
1852 {
1853  return (n * ... * 1);
1854 }
1855 
1860 SERAC_HOST_DEVICE constexpr int size(const double&) { return 1; }
1861 
1863 SERAC_HOST_DEVICE constexpr int size(zero) { return 0; }
1864 
1873 template <int i, typename T, int... n>
1875 {
1876  constexpr int dimensions[] = {n...};
1877  return dimensions[i];
1878 }
1879 
1888 template <typename T, int m, int... n>
1890 {
1891  return m;
1892 }
1893 
1895 template <typename T, int... n>
1896 bool isnan(const tensor<T, n...>& A)
1897 {
1898  bool found_nan = false;
1899  for_constexpr<n...>([&](auto... i) { found_nan |= std::isnan(A(i...)); });
1900  return found_nan;
1901 }
1902 
1904 inline bool isnan(const zero&) { return false; }
1905 
1906 } // namespace serac
1907 
1908 // todo: port to current tensor class:
1909 #if 0
1910 // eigendecomposition for symmetric A
1911 //
1912 // based on "A robust algorithm for finding the eigenvalues and
1913 // eigenvectors of 3x3 symmetric matrices", by Scherzinger & Dohrmann
1914 __host__ __device__
1915 inline void eig(const r2tensor < 3, 3 > & A,
1916  r1tensor < 3 > & eta,
1917  r2tensor < 3, 3 > & Q) {
1918 
1919  for (int i = 0; i < 3; i++) {
1920  eta(i) = 1.0;
1921  for (int j = 0; j < 3; j++) {
1922  Q(i,j) = (i == j);
1923  }
1924  }
1925 
1926  auto A_dev = dev(A);
1927 
1928  double J2 = I2(A_dev);
1929  double J3 = I3(A_dev);
1930 
1931  if (J2 > 0.0) {
1932 
1933  // angle used to find eigenvalues
1934  double tmp = (0.5 * J3) * pow(3.0 / J2, 1.5);
1935  double alpha = acos(fmin(fmax(tmp, -1.0), 1.0)) / 3.0;
1936 
1937  // consider the most distinct eigenvalue first
1938  if (6.0 * alpha < M_PI) {
1939  eta(0) = 2 * sqrt(J2 / 3.0) * cos(alpha);
1940  } else {
1941  eta(0) = 2 * sqrt(J2 / 3.0) * cos(alpha + 2.0 * M_PI / 3.0);
1942  }
1943 
1944  // find the eigenvector for that eigenvalue
1945  r1tensor < 3 > r[3];
1946 
1947  int imax = -1;
1948  double norm_max = -1.0;
1949 
1950  for (int i = 0; i < 3; i++) {
1951 
1952  for (int j = 0; j < 3; j++) {
1953  r[i](j) = A_dev(j,i) - (i == j) * eta(0);
1954  }
1955 
1956  double norm_r = norm(r[i]);
1957  if (norm_max < norm_r) {
1958  imax = i;
1959  norm_max = norm_r;
1960  }
1961 
1962  }
1963 
1964  r1tensor < 3 > s0, s1, t1, t2, v0, v1, v2, w;
1965 
1966  s0 = normalize(r[imax]);
1967  t1 = r[(imax+1)%3] - dot(r[(imax+1)%3], s0) * s0;
1968  t2 = r[(imax+2)%3] - dot(r[(imax+2)%3], s0) * s0;
1969  s1 = normalize((norm(t1) > norm(t2)) ? t1 : t2);
1970 
1971  // record the first eigenvector
1972  v0 = cross(s0, s1);
1973  for (int i = 0; i < 3; i++) {
1974  Q(i,0) = v0(i);
1975  }
1976 
1977  // get the other two eigenvalues by solving the
1978  // remaining quadratic characteristic polynomial
1979  auto A_dev_s0 = dot(A_dev, s0);
1980  auto A_dev_s1 = dot(A_dev, s1);
1981 
1982  double A11 = dot(s0, A_dev_s0);
1983  double A12 = dot(s0, A_dev_s1);
1984  double A21 = dot(s1, A_dev_s0);
1985  double A22 = dot(s1, A_dev_s1);
1986 
1987  double delta = 0.5 * signum(A11-A22) * sqrt((A11-A22)*(A11-A22) + 4*A12*A21);
1988 
1989  eta(1) = 0.5 * (A11 + A22) - delta;
1990  eta(2) = 0.5 * (A11 + A22) + delta;
1991 
1992  // if the remaining eigenvalues are exactly the same
1993  // then just use the basis for the orthogonal complement
1994  // found earlier
1995  if (fabs(delta) <= 1.0e-15) {
1996 
1997  for (int i = 0; i < 3; i++){
1998  Q(i,1) = s0(i);
1999  Q(i,2) = s1(i);
2000  }
2001 
2002  // otherwise compute the remaining eigenvectors
2003  } else {
2004 
2005  t1 = A_dev_s0 - eta(1) * s0;
2006  t2 = A_dev_s1 - eta(1) * s1;
2007 
2008  w = normalize((norm(t1) > norm(t2)) ? t1 : t2);
2009 
2010  v1 = normalize(cross(w, v0));
2011  for (int i = 0; i < 3; i++) Q(i,1) = v1(i);
2012 
2013  // define the last eigenvector as
2014  // the direction perpendicular to the
2015  // first two directions
2016  v2 = normalize(cross(v0, v1));
2017  for (int i = 0; i < 3; i++) Q(i,2) = v2(i);
2018 
2019  }
2020 
2021  // eta are actually eigenvalues of A_dev, so
2022  // shift them to get eigenvalues of A
2023  eta += tr(A) / 3.0;
2024 
2025  }
2026 
2027 }
2028 
2029 inline float angle_between(const vec < 2 > & a, const vec < 2 > & b) {
2030  return acos(clip(dot(normalize(a), normalize(b)), -1.0f, 1.0f));
2031 }
2032 
2033 inline float angle_between(const vec < 3 > & a, const vec < 3 > & b) {
2034  return acos(clip(dot(normalize(a), normalize(b)), -1.0f, 1.0f));
2035 }
2036 
2037 // angle between proper orthogonal matrices
2038 inline float angle_between(const mat < 3, 3 > & U, const mat < 3, 3 > & V) {
2039  return acos(0.5f * (tr(dot(U, transpose(V))) - 1.0f));
2040 }
2041 
2042 inline mat < 2, 2 > rotation(const float theta) {
2043  return mat< 2, 2 >{
2044  {cos(theta), -sin(theta)},
2045  { sin(theta), cos(theta) }
2046  };
2047 }
2048 
2049 inline mat < 3, 3 > axis_to_rotation(const vec < 3 > & omega) {
2050 
2051  float norm_omega = norm(omega);
2052 
2053  if (fabs(norm_omega) < 0.000001f) {
2054 
2055  return eye< 3 >();
2056 
2057  } else {
2058 
2059  vec3 u = omega / norm_omega;
2060 
2061  float c = cos(norm_omega);
2062  float s = sin(norm_omega);
2063 
2064  return mat < 3, 3 >{
2065  {
2066  u[0]*u[0]*(1.0f - c) + c,
2067  u[0]*u[1]*(1.0f - c) - u[2]*s,
2068  u[0]*u[2]*(1.0f - c) + u[1]*s
2069  },{
2070  u[1]*u[0]*(1.0f - c) + u[2]*s,
2071  u[1]*u[1]*(1.0f - c) + c,
2072  u[1]*u[2]*(1.0f - c) - u[0]*s
2073  },{
2074  u[2]*u[0]*(1.0f - c) - u[1]*s,
2075  u[2]*u[1]*(1.0f - c) + u[0]*s,
2076  u[2]*u[2]*(1.0f - c) + c
2077  }
2078  };
2079 
2080  }
2081 
2082 }
2083 
2084 // assumes R is a proper-orthogonal matrix
2085 inline vec < 3 > rotation_to_axis(const mat < 3, 3 > & R) {
2086 
2087  float theta = acos(clip(0.5f * (tr(R) - 1.0f), -1.0f, 1.0f));
2088 
2089  float scale;
2090 
2091  // for small angles, prefer series expansion to division by sin(theta) ~ 0
2092  if (fabs(theta) < 0.00001f) {
2093  scale = 0.5f + theta * theta / 12.0f;
2094  }
2095  else {
2096  scale = 0.5f * theta / sin(theta);
2097  }
2098 
2099  return vec3{ R(2,1) - R(1,2), R(0,2) - R(2,0), R(1,0) - R(0,1) } *scale;
2100 
2101 }
2102 
2103 inline mat < 3, 3 > look_at(const vec < 3 > & direction, const vec < 3 > & up = vec3{ 0.0f, 0.0f, 1.0f }) {
2104  vec3 f = normalize(direction);
2105  vec3 u = normalize(cross(f, cross(up, f)));
2106  vec3 l = normalize(cross(u, f));
2107 
2108  return mat3{
2109  {f[0], l[0], u[0]},
2110  {f[1], l[1], u[1]},
2111  {f[2], l[2], u[2]}
2112  };
2113 }
2114 
2115 inline mat < 2, 2 > look_at(const vec < 2 > & direction) {
2116  vec2 f = normalize(direction);
2117  vec2 l = cross(f);
2118 
2119  return mat2{
2120  {f[0], l[0]},
2121  {f[1], l[1]},
2122  };
2123 }
2124 
2125 inline mat < 3, 3 > R3_basis(const vec3 & n) {
2126  float sign = (n[2] >= 0.0f) ? 1.0f : -1.0f;
2127  float a = -1.0f / (sign + n[2]);
2128  float b = n[0] * n[1] * a;
2129 
2130  return mat < 3, 3 >{
2131  {
2132  1.0f + sign * n[0] * n[0] * a,
2133  b,
2134  n[0],
2135  },{
2136  sign * b,
2137  sign + n[1] * n[1] * a,
2138  n[1]
2139  },{
2140  -sign * n[0],
2141  -n[1],
2142  n[2]
2143  }
2144  };
2145 }
2146 #endif
2147 
2149 
2150 #include "serac/numerics/functional/tuple_tensor_dual_functions.hpp"
This file contains the interface used for initializing/terminating any hardware accelerator-related f...
#define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING
Macro to turn off specific nvcc warnings.
Definition: accelerator.hpp:50
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38
Implementation of isotropic tensor classes.
Utilities for C++ metaprogramming.
constexpr SERAC_HOST_DEVICE void for_constexpr(lambda &&f)
multidimensional loop tool that evaluates the lambda body inside the innermost loop.
Accelerator functionality.
Definition: serac.cpp:38
constexpr SERAC_HOST_DEVICE auto inv(const tensor< T, n, n > &A)
Definition: tensor.hpp:1606
constexpr SERAC_HOST_DEVICE tensor< double, dim, dim > DenseIdentity()
Obtains the identity matrix of the specified dimension.
Definition: tensor.hpp:1183
typename detail::outer_prod< T1, T2 >::type outer_product_t
a type function that returns the tensor type of an outer product of two tensors
Definition: tensor.hpp:1741
constexpr SERAC_HOST_DEVICE tensor< T, n > diag(const tensor< T, n, n > &D)
Returns an array containing the diagonal entries of a square matrix.
Definition: tensor.hpp:1169
SERAC_HOST_DEVICE auto sin(dual< gradient_type > a)
implementation of sine for dual numbers
Definition: dual.hpp:295
constexpr SERAC_HOST_DEVICE auto operator-(const tensor< S, m, n... > &A, const tensor< T, m, n... > &B)
return the difference of two tensors
Definition: tensor.hpp:459
double relative_error(tensor< T, n... > A, tensor< T, n... > B)
computes the relative error (in the frobenius norm) between two tensors of the same shape
Definition: tensor.hpp:1390
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
SERAC_HOST_DEVICE bool is_symmetric(tensor< double, n, n > A, double tolerance=1.0e-8)
Return whether a square rank 2 tensor is symmetric.
Definition: tensor.hpp:1404
constexpr SERAC_HOST_DEVICE auto chop(const tensor< double, m, n > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1683
constexpr SERAC_HOST_DEVICE auto operator*(const dual< gradient_type > &a, double b)
multiplication of a dual number and a non-dual number
Definition: dual.hpp:109
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE auto cos(dual< gradient_type > a)
implementation of cosine for dual numbers
Definition: dual.hpp:287
Domain operator-(const Domain &a, const Domain &b)
create a new domain that is the set difference of a and b
Definition: domain.cpp:500
constexpr SERAC_HOST_DEVICE auto & operator+=(tensor< T, n... > &A, zero)
compound assignment (+) between a tensor and zero (no-op)
Definition: tensor.hpp:560
constexpr SERAC_HOST_DEVICE int size(zero)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1863
constexpr SERAC_HOST_DEVICE int leading_dimension(tensor< T, m, n... >)
a function for querying the first dimension of a tensor
Definition: tensor.hpp:1889
constexpr T & get(variant< T0, T1 > &v)
Returns the variant member of specified type.
Definition: variant.hpp:338
constexpr SERAC_HOST_DEVICE auto dot(const tensor< S, m, n, p, q > &A, const tensor< T, q > &B)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:889
constexpr SERAC_HOST_DEVICE auto transpose(const tensor< T, m, n > &A)
Returns the transpose of the matrix.
Definition: tensor.hpp:1199
SERAC_HOST_DEVICE auto pow(dual< gradient_type > a, dual< gradient_type > b)
implementation of a (dual) raised to the b (dual) power
Definition: dual.hpp:376
SERAC_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor< double, 3, 3 > A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1438
SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING constexpr SERAC_HOST_DEVICE auto make_tensor(lambda_type f)
Creates a tensor of requested dimension by subsequent calls to a functor Can be thought of as analogo...
Definition: tensor.hpp:300
constexpr SERAC_HOST_DEVICE auto get_gradient(const tensor< double, n... > &)
get the gradient of type tensor (note: since its stored type is not a dual number,...
Definition: tensor.hpp:1755
SERAC_HOST_DEVICE auto chain_rule(const tensor< double, m, n, p... > &df_dx, const tensor< double, p... > &dx)
Definition: tensor.hpp:1832
SERAC_HOST_DEVICE auto contract(const zero &, const T &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1375
constexpr SERAC_HOST_DEVICE auto linear_solve(const LuFactorization< T, n > &, const zero)
Definition: tensor.hpp:1554
SERAC_HOST_DEVICE auto sqrt(dual< gradient_type > x)
implementation of square root for dual numbers
Definition: dual.hpp:279
constexpr SERAC_HOST_DEVICE auto operator+(dual< gradient_type > a, double b)
addition of a dual number and a non-dual number
Definition: dual.hpp:60
constexpr SERAC_HOST_DEVICE auto det(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1221
constexpr SERAC_HOST_DEVICE auto dev(const tensor< T, n, n > &A)
Calculates the deviator of a matrix (rank-2 tensor)
Definition: tensor.hpp:1124
tensor< double, 2, 2 > mat2
statically sized 2x2 matrix of doubles
Definition: tensor.hpp:117
auto matrix_sqrt(const tensor< T, dim, dim > &A)
compute the matrix square root of a square, real-valued, symmetric matrix i.e. given A,...
Definition: tensor.hpp:1278
constexpr SERAC_HOST_DEVICE auto solve_upper_triangular(const tensor< T, n, n > &U, const tensor< T, n, m... > &y)
Solves an upper triangular system Ux = y.
Definition: tensor.hpp:1520
constexpr SERAC_HOST_DEVICE auto & operator-=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (-) for dual numbers
Definition: dual.hpp:192
constexpr SERAC_HOST_DEVICE auto tensor_with_shape(std::integer_sequence< int, n... >)
Creates a tensor given the dimensions in a std::integer_sequence.
Definition: tensor.hpp:283
constexpr SERAC_HOST_DEVICE auto squared_norm(const tensor< T, n... > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1033
tensor< double, 3 > vec3
statically sized vector of 3 doubles
Definition: tensor.hpp:115
bool isnan(const zero &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1904
constexpr SERAC_HOST_DEVICE auto sym(const tensor< T, n, n > &A)
Returns the symmetric part of a square matrix.
Definition: tensor.hpp:1088
constexpr SERAC_HOST_DEVICE auto antisym(const tensor< T, n, n > &A)
Returns the antisymmetric part of a square matrix.
Definition: tensor.hpp:1105
SERAC_HOST_DEVICE auto abs(dual< gradient_type > x)
Implementation of absolute value function for dual numbers.
Definition: dual.hpp:220
constexpr SERAC_HOST_DEVICE auto outer(const tensor< S, m > &A, const tensor< T, n > &B)
Definition: tensor.hpp:647
SERAC_HOST_DEVICE auto acos(dual< gradient_type > a)
implementation of acos for dual numbers
Definition: dual.hpp:344
constexpr SERAC_HOST_DEVICE int dimension(const tensor< T, n... > &)
a function for querying the ith dimension of a tensor
Definition: tensor.hpp:1874
auto & operator<<(std::ostream &out, zero)
Write a zero out to an output stream.
Definition: tensor.hpp:1636
constexpr auto double_dot(const tensor< S, m, n > &A, const tensor< T, m, n > &B)
Definition: tensor.hpp:984
auto cross(const tensor< S, 3 > &u, const tensor< T, 3 > &v)
compute the (right handed) cross product of two 3-vectors
Definition: tensor.hpp:928
constexpr SERAC_HOST_DEVICE auto operator*(const tensor< S, m... > &A, const tensor< T, n... > &B)
this is a shorthand for dot(A, B)
Definition: tensor.hpp:999
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:274
constexpr SERAC_HOST_DEVICE auto & operator-=(tensor< T, n... > &A, zero)
compound assignment (-) between a tensor and zero (no-op)
Definition: tensor.hpp:589
constexpr SERAC_HOST_DEVICE auto & operator+=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (+) for dual numbers
Definition: dual.hpp:183
constexpr SERAC_HOST_DEVICE auto tr(const tensor< T, n, n > &A)
Returns the trace of a square matrix.
Definition: tensor.hpp:1073
SERAC_HOST_DEVICE auto normalize(const tensor< T, n... > &A)
Normalizes the tensor Each element is divided by the Frobenius norm of the tensor,...
Definition: tensor.hpp:1062
tensor< double, 2 > vec2
statically sized vector of 2 doubles
Definition: tensor.hpp:114
constexpr SERAC_HOST_DEVICE auto detApIm1(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1251
constexpr SERAC_HOST_DEVICE auto diagonal_matrix(const tensor< T, n, n > &A)
Returns a square matrix (rank-2 tensor) containing the diagonal entries of the input square matrix wi...
Definition: tensor.hpp:1141
tensor< double, 3, 3 > mat3
statically sized 3x3 matrix of doubles
Definition: tensor.hpp:118
tensor(const T(&data)[n1][n2]) -> tensor< T, n1, n2 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE void print(const tensor< double, m, n... > &A)
print a tensor using printf, so that it is suitable for use inside cuda kernels.
Definition: tensor.hpp:1654
constexpr SERAC_HOST_DEVICE auto operator/(const dual< gradient_type > &a, double b)
division of a dual number by a non-dual number
Definition: dual.hpp:130
constexpr SERAC_HOST_DEVICE auto solve_lower_triangular(const tensor< T, n, n > &L, const tensor< T, n, m... > &b)
Definition: tensor.hpp:1499
std::conditional_t<(n1==1 &&n2==1), double, std::conditional_t< n1==1, tensor< T, n2 >, std::conditional_t< n2==1, tensor< T, n1 >, tensor< T, n1, n2 > >> > reduced_tensor
Removes 1s from tensor dimensions For example, a tensor<T, 1, 10> is equivalent to a tensor<T,...
Definition: tensor.hpp:275
constexpr SERAC_HOST_DEVICE auto inner(double A, double B)
Definition: tensor.hpp:697
constexpr SERAC_HOST_DEVICE auto norm(zero)
overload of Frobenius norm for zero type
Definition: tensor.hpp:1054
Representation of an LU factorization.
Definition: tensor.hpp:1459
tensor< T, n, n > U
Upper triangular factor.
Definition: tensor.hpp:1462
tensor< int, n > P
Row permutation indices due to partial pivoting.
Definition: tensor.hpp:1460
tensor< T, n, n > L
Lower triangular factor. Has ones on diagonal.
Definition: tensor.hpp:1461
checks if a type is zero
Definition: tensor.hpp:151
Arbitrary-rank tensor class.
Definition: tensor.hpp:29
A sentinel struct for eliding no-op tensor operations.
Definition: tensor.hpp:123
SERAC_HOST_DEVICE auto operator=(T)
anything assigned to zero does not change its value and returns zero
Definition: tensor.hpp:143
SERAC_HOST_DEVICE auto operator()(T...) const
zero can be accessed like a multidimensional array
Definition: tensor.hpp:136