28 template <
typename T,
int... n>
32 template <
typename T,
int m,
int... n>
33 struct tensor<T, m, n...> {
34 template <
typename i_type>
39 template <
typename i_type>
44 template <
typename i_type,
typename... jklm_type>
47 return data[i](jklm...);
49 template <
typename i_type,
typename... jklm_type>
52 return data[i](jklm...);
56 SERAC_HOST_DEVICE constexpr
const auto& operator[](
int i)
const {
return data[i]; }
61 template <
typename T,
int m>
63 template <
typename i_type>
68 template <
typename i_type>
74 SERAC_HOST_DEVICE constexpr
const auto& operator[](
int i)
const {
return data[i]; }
100 template <
typename T,
int n1>
111 template <
typename T,
int n1,
int n2>
128 template <
typename T,
int... n>
135 template <
typename... T>
142 template <
typename T>
150 template <
typename T>
163 template <
typename T>
170 template <
typename T>
185 template <
typename T>
192 template <
typename T>
204 template <
typename T>
211 template <
typename T>
218 template <
typename T>
225 template <
typename T>
228 static_assert(::detail::always_false<T>{},
"Error: Can't divide by zero!");
252 template <
typename T>
259 template <
typename T>
272 template <
typename T,
int n1,
int n2 = 1>
274 (n1 == 1 && n2 == 1),
double,
282 template <
typename T,
int... n>
299 template <
typename lambda_type>
302 using T = decltype(f());
318 template <
int n1,
typename lambda_type>
321 using T = decltype(f(n1));
323 for (
int i = 0; i < n1; i++) {
342 template <
int n1,
int n2,
typename lambda_type>
345 using T = decltype(f(n1, n2));
347 for (
int i = 0; i < n1; i++) {
348 for (
int j = 0; j < n2; j++) {
369 template <
int n1,
int n2,
int n3,
typename lambda_type>
372 using T = decltype(f(n1, n2, n3));
374 for (
int i = 0; i < n1; i++) {
375 for (
int j = 0; j < n2; j++) {
376 for (
int k = 0; k < n3; k++) {
377 A(i, j, k) = f(i, j, k);
399 template <
int n1,
int n2,
int n3,
int n4,
typename lambda_type>
402 using T = decltype(f(n1, n2, n3, n4));
404 for (
int i = 0; i < n1; i++) {
405 for (
int j = 0; j < n2; j++) {
406 for (
int k = 0; k < n3; k++) {
407 for (
int l = 0; l < n4; l++) {
408 A(i, j, k, l) = f(i, j, k, l);
424 template <
typename S,
typename T,
int m,
int... n>
427 tensor<decltype(S{} + T{}), m, n...> C{};
428 for (
int i = 0; i < m; i++) {
440 template <
typename T,
int m,
int... n>
444 for (
int i = 0; i < m; i++) {
458 template <
typename S,
typename T,
int m,
int... n>
461 tensor<decltype(S{} + T{}), m, n...> C{};
462 for (
int i = 0; i < m; i++) {
476 template <
typename S,
typename T,
int m,
int... n>
479 for (
int i = 0; i < m; i++) {
492 template <
typename T>
505 template <
typename T,
int n>
508 for (
int i = 0; i < n; i++) {
509 A.data[i][0] += B[i];
520 template <
typename T,
int n>
523 for (
int i = 0; i < n; i++) {
524 A.data[0][i] += B[i];
535 template <
typename T>
538 return A.data[0] += B;
547 template <
typename T>
550 return A.data[0][0] += B;
559 template <
typename T,
int... n>
573 template <
typename S,
typename T,
int m,
int... n>
576 for (
int i = 0; i < m; i++) {
588 template <
typename T,
int... n>
598 template <
typename T,
int n>
601 tensor<decltype(
double{} * T{}), n> AB{};
602 for (
int i = 0; i < n; i++) {
612 template <
typename T,
int m>
615 tensor<decltype(T{} *
double{}), m> AB{};
616 for (
int i = 0; i < m; i++) {
626 template <
typename T,
int n>
636 template <
typename T,
int n>
646 template <
typename S,
typename T,
int m,
int n>
649 tensor<decltype(S{} * T{}), m, n> AB{};
650 for (
int i = 0; i < m; i++) {
651 for (
int j = 0; j < n; j++) {
652 AB[i][j] = A[i] * B[j];
667 template <
typename S,
typename T,
int m,
int n>
670 decltype(S{} * T{}) sum{};
671 for (
int i = 0; i < m; i++) {
672 for (
int j = 0; j < n; j++) {
673 sum += A[i][j] * B[i][j];
683 template <
typename S,
typename T,
int m>
686 decltype(S{} * T{}) sum{};
687 for (
int i = 0; i < m; i++) {
707 template <
typename S,
typename T,
int m,
int n,
int p>
710 tensor<decltype(S{} * T{}), m, p> AB{};
711 for (
int i = 0; i < m; i++) {
712 for (
int j = 0; j < p; j++) {
713 for (
int k = 0; k < n; k++) {
714 AB[i][j] = AB[i][j] + A[i][k] * B[k][j];
725 template <
typename T,
int m>
735 template <
typename T,
int m>
745 template <
typename S,
typename T,
int m>
748 decltype(S{} * T{}) AB{};
749 for (
int i = 0; i < m; i++) {
750 AB = AB + A[i] * B[i];
759 template <
typename S,
typename T,
int m,
int n>
762 tensor<decltype(S{} * T{}), n> AB{};
763 for (
int i = 0; i < n; i++) {
764 for (
int j = 0; j < m; j++) {
765 AB[i] = AB[i] + A[j] * B[j][i];
775 template <
typename S,
typename T,
int m,
int n,
int p>
778 tensor<decltype(S{} * T{}), n, p> AB{};
779 for (
int j = 0; j < m; j++) {
780 AB = AB + A[j] * B[j];
795 template <
typename S,
typename T,
int m,
int n,
int p,
int q>
798 tensor<decltype(S{} * T{}), n, p, q> AB{};
799 for (
int j = 0; j < m; j++) {
800 AB = AB + A[j] * B[j];
809 template <
typename S,
typename T,
int m,
int n>
812 tensor<decltype(S{} * T{}), m> AB{};
813 for (
int i = 0; i < m; i++) {
814 for (
int j = 0; j < n; j++) {
815 AB[i] = AB[i] + A[i][j] * B[j];
825 template <
typename S,
typename T,
int m,
int n,
int p,
int q,
int r>
828 tensor<decltype(S{} * T{}), m, p, q, r> AB{};
829 for (
int i = 0; i < m; i++) {
830 for (
int j = 0; j < n; j++) {
831 AB[i] = AB[i] + A[i][j] * B[j];
841 template <
typename S,
typename T,
int m,
int n,
int p,
int q>
844 tensor<decltype(S{} * T{}), m, p, q> AB{};
845 for (
int i = 0; i < m; i++) {
846 for (
int j = 0; j < n; j++) {
847 AB[i] = AB[i] + A[i][j] * B[j];
857 template <
typename S,
typename T,
int m,
int n,
int p>
860 tensor<decltype(S{} * T{}), m, n> AB{};
861 for (
int i = 0; i < m; i++) {
862 for (
int j = 0; j < n; j++) {
863 for (
int k = 0; k < p; k++) {
864 AB[i][j] += A[i][j][k] * B[k];
875 template <
typename S,
typename T,
typename U,
int m,
int n>
878 decltype(S{} * T{} * U{}) uAv{};
879 for (
int i = 0; i < m; i++) {
880 for (
int j = 0; j < n; j++) {
881 uAv += u[i] * A[i][j] * v[j];
888 template <
typename S,
typename T,
int m,
int n,
int p,
int q>
891 tensor<decltype(S{} * T{}), m, n, p> AB{};
892 for (
int i = 0; i < m; i++) {
893 for (
int j = 0; j < n; j++) {
894 for (
int k = 0; k < p; k++) {
895 for (
int l = 0; l < q; l++) {
896 AB[i][j][k] += A[i][j][k][l] * B[l];
905 template <
typename T>
908 return tensor<T, 3>{A(1, 0) * A(2, 1) - A(2, 0) * A(1, 1), A(2, 0) * A(0, 1) - A(0, 0) * A(2, 1),
909 A(0, 0) * A(1, 1) - A(1, 0) * A(0, 1)};
913 template <
typename T>
920 template <
typename T>
927 template <
typename S,
typename T>
930 return tensor<decltype(S{} * T{}), 3>{u(1) * v(2) - u(2) * v(1), u(2) * v(0) - u(0) * v(2),
931 u(0) * v(1) - u(1) * v(0)};
945 template <
typename S,
typename T,
int m,
int n,
int p,
int q>
948 tensor<decltype(S{} * T{}), m, n> AB{};
949 for (
int i = 0; i < m; i++) {
950 for (
int j = 0; j < n; j++) {
951 for (
int k = 0; k < p; k++) {
952 for (
int l = 0; l < q; l++) {
953 AB[i][j] += A[i][j][k][l] * B[k][l];
965 template <
typename S,
typename T,
int m,
int n,
int p>
968 tensor<decltype(S{} * T{}), m> AB{};
969 for (
int i = 0; i < m; i++) {
970 for (
int j = 0; j < n; j++) {
971 for (
int k = 0; k < p; k++) {
972 AB[i] += A[i][j][k] * B[j][k];
983 template <
typename S,
typename T,
int m,
int n>
986 decltype(S{} * T{}) AB{};
987 for (
int i = 0; i < m; i++) {
988 for (
int j = 0; j < n; j++) {
989 AB += A[i][j] * B[i][j];
998 template <
typename S,
typename T,
int... m,
int... n>
1008 template <
typename T,
int m>
1012 for (
int i = 0; i < m; i++) {
1013 total += A[i] * A[i];
1019 template <
typename T,
int m,
int n>
1023 for (
int i = 0; i < m; i++) {
1024 for (
int j = 0; j < n; j++) {
1025 total += A[i][j] * A[i][j];
1032 template <
typename T,
int... n>
1036 for_constexpr<n...>([&](
auto... i) { total += A(i...) * A(i...); });
1044 template <
typename T,
int... n>
1061 template <
typename T,
int... n>
1072 template <
typename T,
int n>
1076 for (
int i = 0; i < n; i++) {
1077 trA = trA + A[i][i];
1087 template <
typename T,
int n>
1091 for (
int i = 0; i < n; i++) {
1092 for (
int j = 0; j < n; j++) {
1093 symA[i][j] = 0.5 * (A[i][j] + A[j][i]);
1104 template <
typename T,
int n>
1108 for (
int i = 0; i < n; i++) {
1109 for (
int j = 0; j < n; j++) {
1110 antisymA[i][j] = 0.5 * (A[i][j] - A[j][i]);
1123 template <
typename T,
int n>
1128 for (
int i = 0; i < n; i++) {
1129 devA[i][i] -= trA / n;
1140 template <
typename T,
int n>
1144 for (
int i = 0; i < n; i++) {
1154 template <
typename T,
int n>
1158 for (
int i = 0; i < n; i++) {
1168 template <
typename T,
int n>
1172 for (
int i = 0; i < n; i++) {
1186 for (
int i = 0; i < dim; i++) {
1187 for (
int j = 0; j < dim; j++) {
1198 template <
typename T,
int m,
int n>
1202 for (
int i = 0; i < n; i++) {
1203 for (
int j = 0; j < m; j++) {
1214 template <
typename T>
1217 return A[0][0] * A[1][1] - A[0][1] * A[1][0];
1220 template <
typename T>
1223 return A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] + A[0][2] * A[1][0] * A[2][1] -
1224 A[0][0] * A[1][2] * A[2][1] - A[0][1] * A[1][0] * A[2][2] - A[0][2] * A[1][1] * A[2][0];
1237 template <
typename T>
1246 return A(0, 0) - A(0, 1) * A(1, 0) + A(1, 1) + A(0, 0) * A(1, 1);
1250 template <
typename T>
1257 return A(0, 0) + A(1, 1) + A(2, 2)
1258 - A(0, 1) * A(1, 0) * (1 + A(2, 2))
1259 + A(0, 0) * A(1, 1) * (1 + A(2, 2))
1260 - A(0, 2) * A(2, 0) * (1 + A(1, 1))
1261 - A(1, 2) * A(2, 1) * (1 + A(0, 0))
1264 + A(0, 1) * A(1, 2) * A(2, 0)
1265 + A(0, 2) * A(1, 0) * A(2, 1);
1277 template <
typename T,
int dim>
1281 for (
int i = 0; i < 15; i++) {
1282 B = 0.5 * (B +
dot(A,
inv(B)));
1316 template <
int i1,
int i2,
typename S,
int m,
int... n,
typename T,
int p,
int q>
1319 constexpr
int Adims[] = {m, n...};
1320 constexpr
int Bdims[] = {p, q};
1321 static_assert(
sizeof...(n) < 3);
1322 static_assert(Adims[i1] == Bdims[i2],
"error: incompatible tensor dimensions");
1325 constexpr
int new_dim = (i2 == 0) ? q : p;
1326 constexpr
int d1 = (i1 == 0) ? new_dim : Adims[0];
1327 constexpr
int d2 = (i1 == 1) ? new_dim : Adims[1];
1328 constexpr
int d3 =
sizeof...(n) == 1 ? 0 : ((i1 == 2) ? new_dim : Adims[2]);
1331 using U = decltype(S{} * T{});
1338 if constexpr (d3 == 0) {
1339 for (
int i = 0; i < d1; i++) {
1340 for (
int j = 0; j < d2; j++) {
1342 for (
int k = 0; k < Adims[i1]; k++) {
1343 if constexpr (i1 == 0 && i2 == 0) sum += A(k, j) * B(k, i);
1344 if constexpr (i1 == 1 && i2 == 0) sum += A(i, k) * B(k, j);
1345 if constexpr (i1 == 0 && i2 == 1) sum += A(k, j) * B(i, k);
1346 if constexpr (i1 == 1 && i2 == 1) sum += A(i, k) * B(j, k);
1352 for (
int i = 0; i < d1; i++) {
1353 for (
int j = 0; j < d2; j++) {
1354 for (
int k = 0; k < d3; k++) {
1356 for (
int l = 0; l < Adims[i1]; l++) {
1357 if constexpr (i1 == 0 && i2 == 0) sum += A(l, j, k) * B(l, i);
1358 if constexpr (i1 == 1 && i2 == 0) sum += A(i, l, k) * B(l, j);
1359 if constexpr (i1 == 2 && i2 == 0) sum += A(i, j, l) * B(l, k);
1360 if constexpr (i1 == 0 && i2 == 1) sum += A(l, j, k) * B(i, l);
1361 if constexpr (i1 == 1 && i2 == 1) sum += A(i, l, k) * B(j, l);
1362 if constexpr (i1 == 2 && i2 == 1) sum += A(i, j, l) * B(k, l);
1374 template <
int i1,
int i2,
typename T>
1389 template <
typename T,
int... n>
1406 for (
int i = 0; i < n; ++i) {
1407 for (
int j = i + 1; j < n; ++j) {
1408 if (
std::abs(A(i, j) - A(j, i)) > tolerance) {
1429 if (A(0, 0) < 0.0) {
1446 auto subtensor = make_tensor<2, 2>([A](
int i,
int j) {
return A(i, j); });
1458 template <
typename T,
int n>
1479 template <
typename T,
int n,
int... m>
1484 for (
int i = 0; i < n; i++) {
1486 for (
int j = 0; j < i; j++) {
1487 c -= L[i][j] * y[j];
1498 template <
typename T,
int n,
int... m>
1519 template <
typename T,
int n,
int... m>
1523 for (
int i = n - 1; i >= 0; i--) {
1525 for (
int j = i + 1; j < n; j++) {
1526 c -= U[i][j] * x[j];
1537 template <
typename S,
typename T,
int n,
int... m>
1553 template <
typename T,
int n>
1566 double inv_detA(1.0 /
det(A));
1570 invA[0][0] = A[1][1] * inv_detA;
1571 invA[0][1] = -A[0][1] * inv_detA;
1572 invA[1][0] = -A[1][0] * inv_detA;
1573 invA[1][1] = A[0][0] * inv_detA;
1584 double inv_detA(1.0 /
det(A));
1588 invA[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) * inv_detA;
1589 invA[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) * inv_detA;
1590 invA[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) * inv_detA;
1591 invA[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) * inv_detA;
1592 invA[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) * inv_detA;
1593 invA[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) * inv_detA;
1594 invA[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) * inv_detA;
1595 invA[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) * inv_detA;
1596 invA[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) * inv_detA;
1605 template <
typename T,
int n>
1608 auto I = DenseIdentity<n>();
1620 template <
typename T,
int m,
int... n>
1624 for (
int i = 1; i < m; i++) {
1625 out <<
", " << A[i];
1653 template <
int m,
int... n>
1658 for (
int i = 1; i < m; i++) {
1673 for (
int i = 0; i < n; i++) {
1674 if (copy[i] * copy[i] < 1.0e-20) {
1682 template <
int m,
int n>
1686 for (
int i = 0; i < m; i++) {
1687 for (
int j = 0; j < n; j++) {
1688 if (copy[i][j] * copy[i][j] < 1.0e-20) {
1699 template <
typename T1,
typename T2>
1702 template <
int... m,
int... n>
1703 struct outer_prod<
tensor<double, m...>,
tensor<double, n...>> {
1708 struct outer_prod<double,
tensor<double, n...>> {
1713 struct outer_prod<
tensor<double, n...>, double> {
1718 struct outer_prod<double, double> {
1719 using type = tensor<double>;
1722 template <
typename T>
1723 struct outer_prod<zero, T> {
1727 template <
typename T>
1728 struct outer_prod<T, zero> {
1740 template <
typename T1,
typename T2>
1769 template <
typename T>
1779 template <
typename T>
1809 for_constexpr<n...>([&](
auto... i) { total += df_dx(i...) * dx(i...); });
1817 template <
int m,
int... n>
1821 for (
int i = 0; i < m; i++) {
1831 template <
int m,
int n,
int... p>
1835 for (
int i = 0; i < m; i++) {
1836 for (
int j = 0; j < n; j++) {
1850 template <
typename T,
int... n>
1853 return (n * ... * 1);
1873 template <
int i,
typename T,
int... n>
1876 constexpr
int dimensions[] = {n...};
1877 return dimensions[i];
1888 template <
typename T,
int m,
int... n>
1895 template <
typename T,
int... n>
1898 bool found_nan =
false;
1915 inline void eig(
const r2tensor < 3, 3 > & A,
1916 r1tensor < 3 > & eta,
1917 r2tensor < 3, 3 > & Q) {
1919 for (
int i = 0; i < 3; i++) {
1921 for (
int j = 0; j < 3; j++) {
1926 auto A_dev =
dev(A);
1928 double J2 = I2(A_dev);
1929 double J3 = I3(A_dev);
1934 double tmp = (0.5 * J3) *
pow(3.0 / J2, 1.5);
1935 double alpha =
acos(fmin(fmax(tmp, -1.0), 1.0)) / 3.0;
1938 if (6.0 * alpha < M_PI) {
1939 eta(0) = 2 *
sqrt(J2 / 3.0) *
cos(alpha);
1941 eta(0) = 2 *
sqrt(J2 / 3.0) *
cos(alpha + 2.0 * M_PI / 3.0);
1945 r1tensor < 3 > r[3];
1948 double norm_max = -1.0;
1950 for (
int i = 0; i < 3; i++) {
1952 for (
int j = 0; j < 3; j++) {
1953 r[i](j) = A_dev(j,i) - (i == j) * eta(0);
1956 double norm_r =
norm(r[i]);
1957 if (norm_max < norm_r) {
1964 r1tensor < 3 > s0, s1, t1, t2, v0, v1, v2, w;
1967 t1 = r[(imax+1)%3] -
dot(r[(imax+1)%3], s0) * s0;
1968 t2 = r[(imax+2)%3] -
dot(r[(imax+2)%3], s0) * s0;
1973 for (
int i = 0; i < 3; i++) {
1979 auto A_dev_s0 =
dot(A_dev, s0);
1980 auto A_dev_s1 =
dot(A_dev, s1);
1982 double A11 =
dot(s0, A_dev_s0);
1983 double A12 =
dot(s0, A_dev_s1);
1984 double A21 =
dot(s1, A_dev_s0);
1985 double A22 =
dot(s1, A_dev_s1);
1987 double delta = 0.5 * signum(A11-A22) *
sqrt((A11-A22)*(A11-A22) + 4*A12*A21);
1989 eta(1) = 0.5 * (A11 + A22) - delta;
1990 eta(2) = 0.5 * (A11 + A22) + delta;
1995 if (fabs(delta) <= 1.0e-15) {
1997 for (
int i = 0; i < 3; i++){
2005 t1 = A_dev_s0 - eta(1) * s0;
2006 t2 = A_dev_s1 - eta(1) * s1;
2011 for (
int i = 0; i < 3; i++) Q(i,1) = v1(i);
2017 for (
int i = 0; i < 3; i++) Q(i,2) = v2(i);
2029 inline float angle_between(
const vec < 2 > & a,
const vec < 2 > & b) {
2033 inline float angle_between(
const vec < 3 > & a,
const vec < 3 > & b) {
2038 inline float angle_between(
const mat < 3, 3 > & U,
const mat < 3, 3 > & V) {
2042 inline mat < 2, 2 > rotation(
const float theta) {
2044 {
cos(theta), -
sin(theta)},
2045 {
sin(theta),
cos(theta) }
2049 inline mat < 3, 3 > axis_to_rotation(
const vec < 3 > & omega) {
2051 float norm_omega =
norm(omega);
2053 if (fabs(norm_omega) < 0.000001f) {
2059 vec3 u = omega / norm_omega;
2061 float c =
cos(norm_omega);
2062 float s =
sin(norm_omega);
2064 return mat < 3, 3 >{
2066 u[0]*u[0]*(1.0f - c) + c,
2067 u[0]*u[1]*(1.0f - c) - u[2]*s,
2068 u[0]*u[2]*(1.0f - c) + u[1]*s
2070 u[1]*u[0]*(1.0f - c) + u[2]*s,
2071 u[1]*u[1]*(1.0f - c) + c,
2072 u[1]*u[2]*(1.0f - c) - u[0]*s
2074 u[2]*u[0]*(1.0f - c) - u[1]*s,
2075 u[2]*u[1]*(1.0f - c) + u[0]*s,
2076 u[2]*u[2]*(1.0f - c) + c
2085 inline vec < 3 > rotation_to_axis(
const mat < 3, 3 > & R) {
2087 float theta =
acos(clip(0.5f * (
tr(R) - 1.0f), -1.0f, 1.0f));
2092 if (fabs(theta) < 0.00001f) {
2093 scale = 0.5f + theta * theta / 12.0f;
2096 scale = 0.5f * theta /
sin(theta);
2099 return vec3{ R(2,1) - R(1,2), R(0,2) - R(2,0), R(1,0) - R(0,1) } *scale;
2103 inline mat < 3, 3 > look_at(
const vec < 3 > & direction,
const vec < 3 > & up =
vec3{ 0.0f, 0.0f, 1.0f }) {
2115 inline mat < 2, 2 > look_at(
const vec < 2 > & direction) {
2125 inline mat < 3, 3 > R3_basis(
const vec3 & n) {
2126 float sign = (n[2] >= 0.0f) ? 1.0f : -1.0f;
2127 float a = -1.0f / (sign + n[2]);
2128 float b = n[0] * n[1] * a;
2130 return mat < 3, 3 >{
2132 1.0f + sign * n[0] * n[0] * a,
2137 sign + n[1] * n[1] * a,
2150 #include "serac/numerics/functional/tuple_tensor_dual_functions.hpp"
This file contains the interface used for initializing/terminating any hardware accelerator-related f...
#define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING
Macro to turn off specific nvcc warnings.
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Implementation of isotropic tensor classes.
Accelerator functionality.
constexpr SERAC_HOST_DEVICE auto inv(const tensor< T, n, n > &A)
constexpr SERAC_HOST_DEVICE tensor< double, dim, dim > DenseIdentity()
Obtains the identity matrix of the specified dimension.
typename detail::outer_prod< T1, T2 >::type outer_product_t
a type function that returns the tensor type of an outer product of two tensors
constexpr SERAC_HOST_DEVICE tensor< T, n > diag(const tensor< T, n, n > &D)
Returns an array containing the diagonal entries of a square matrix.
SERAC_HOST_DEVICE auto sin(dual< gradient_type > a)
implementation of sine for dual numbers
constexpr SERAC_HOST_DEVICE auto operator-(const tensor< S, m, n... > &A, const tensor< T, m, n... > &B)
return the difference of two tensors
double relative_error(tensor< T, n... > A, tensor< T, n... > B)
computes the relative error (in the frobenius norm) between two tensors of the same shape
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
SERAC_HOST_DEVICE bool is_symmetric(tensor< double, n, n > A, double tolerance=1.0e-8)
Return whether a square rank 2 tensor is symmetric.
constexpr SERAC_HOST_DEVICE auto chop(const tensor< double, m, n > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE auto operator*(const dual< gradient_type > &a, double b)
multiplication of a dual number and a non-dual number
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE auto cos(dual< gradient_type > a)
implementation of cosine for dual numbers
Domain operator-(const Domain &a, const Domain &b)
create a new domain that is the set difference of a and b
constexpr SERAC_HOST_DEVICE auto & operator+=(tensor< T, n... > &A, zero)
compound assignment (+) between a tensor and zero (no-op)
constexpr SERAC_HOST_DEVICE int size(zero)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE int leading_dimension(tensor< T, m, n... >)
a function for querying the first dimension of a tensor
constexpr T & get(variant< T0, T1 > &v)
Returns the variant member of specified type.
constexpr SERAC_HOST_DEVICE auto dot(const tensor< S, m, n, p, q > &A, const tensor< T, q > &B)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE auto transpose(const tensor< T, m, n > &A)
Returns the transpose of the matrix.
SERAC_HOST_DEVICE auto pow(dual< gradient_type > a, dual< gradient_type > b)
implementation of a (dual) raised to the b (dual) power
SERAC_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor< double, 3, 3 > A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING constexpr SERAC_HOST_DEVICE auto make_tensor(lambda_type f)
Creates a tensor of requested dimension by subsequent calls to a functor Can be thought of as analogo...
constexpr SERAC_HOST_DEVICE auto get_gradient(const tensor< double, n... > &)
get the gradient of type tensor (note: since its stored type is not a dual number,...
SERAC_HOST_DEVICE auto chain_rule(const tensor< double, m, n, p... > &df_dx, const tensor< double, p... > &dx)
SERAC_HOST_DEVICE auto contract(const zero &, const T &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE auto linear_solve(const LuFactorization< T, n > &, const zero)
SERAC_HOST_DEVICE auto sqrt(dual< gradient_type > x)
implementation of square root for dual numbers
constexpr SERAC_HOST_DEVICE auto operator+(dual< gradient_type > a, double b)
addition of a dual number and a non-dual number
constexpr SERAC_HOST_DEVICE auto det(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE auto dev(const tensor< T, n, n > &A)
Calculates the deviator of a matrix (rank-2 tensor)
tensor< double, 2, 2 > mat2
statically sized 2x2 matrix of doubles
auto matrix_sqrt(const tensor< T, dim, dim > &A)
compute the matrix square root of a square, real-valued, symmetric matrix i.e. given A,...
constexpr SERAC_HOST_DEVICE auto solve_upper_triangular(const tensor< T, n, n > &U, const tensor< T, n, m... > &y)
Solves an upper triangular system Ux = y.
constexpr SERAC_HOST_DEVICE auto & operator-=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (-) for dual numbers
constexpr SERAC_HOST_DEVICE auto tensor_with_shape(std::integer_sequence< int, n... >)
Creates a tensor given the dimensions in a std::integer_sequence.
constexpr SERAC_HOST_DEVICE auto squared_norm(const tensor< T, n... > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
tensor< double, 3 > vec3
statically sized vector of 3 doubles
bool isnan(const zero &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE auto sym(const tensor< T, n, n > &A)
Returns the symmetric part of a square matrix.
constexpr SERAC_HOST_DEVICE auto antisym(const tensor< T, n, n > &A)
Returns the antisymmetric part of a square matrix.
SERAC_HOST_DEVICE auto abs(dual< gradient_type > x)
Implementation of absolute value function for dual numbers.
constexpr SERAC_HOST_DEVICE auto outer(const tensor< S, m > &A, const tensor< T, n > &B)
SERAC_HOST_DEVICE auto acos(dual< gradient_type > a)
implementation of acos for dual numbers
constexpr SERAC_HOST_DEVICE int dimension(const tensor< T, n... > &)
a function for querying the ith dimension of a tensor
auto & operator<<(std::ostream &out, zero)
Write a zero out to an output stream.
constexpr auto double_dot(const tensor< S, m, n > &A, const tensor< T, m, n > &B)
auto cross(const tensor< S, 3 > &u, const tensor< T, 3 > &v)
compute the (right handed) cross product of two 3-vectors
constexpr SERAC_HOST_DEVICE auto operator*(const tensor< S, m... > &A, const tensor< T, n... > &B)
this is a shorthand for dot(A, B)
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
constexpr SERAC_HOST_DEVICE auto & operator-=(tensor< T, n... > &A, zero)
compound assignment (-) between a tensor and zero (no-op)
constexpr SERAC_HOST_DEVICE auto & operator+=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (+) for dual numbers
constexpr SERAC_HOST_DEVICE auto tr(const tensor< T, n, n > &A)
Returns the trace of a square matrix.
SERAC_HOST_DEVICE auto normalize(const tensor< T, n... > &A)
Normalizes the tensor Each element is divided by the Frobenius norm of the tensor,...
tensor< double, 2 > vec2
statically sized vector of 2 doubles
constexpr SERAC_HOST_DEVICE auto detApIm1(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
constexpr SERAC_HOST_DEVICE auto diagonal_matrix(const tensor< T, n, n > &A)
Returns a square matrix (rank-2 tensor) containing the diagonal entries of the input square matrix wi...
tensor< double, 3, 3 > mat3
statically sized 3x3 matrix of doubles
tensor(const T(&data)[n1][n2]) -> tensor< T, n1, n2 >
class template argument deduction guide for type tensor.
SERAC_HOST_DEVICE void print(const tensor< double, m, n... > &A)
print a tensor using printf, so that it is suitable for use inside cuda kernels.
constexpr SERAC_HOST_DEVICE auto operator/(const dual< gradient_type > &a, double b)
division of a dual number by a non-dual number
constexpr SERAC_HOST_DEVICE auto solve_lower_triangular(const tensor< T, n, n > &L, const tensor< T, n, m... > &b)
std::conditional_t<(n1==1 &&n2==1), double, std::conditional_t< n1==1, tensor< T, n2 >, std::conditional_t< n2==1, tensor< T, n1 >, tensor< T, n1, n2 > >> > reduced_tensor
Removes 1s from tensor dimensions For example, a tensor<T, 1, 10> is equivalent to a tensor<T,...
constexpr SERAC_HOST_DEVICE auto inner(double A, double B)
constexpr SERAC_HOST_DEVICE auto norm(zero)
overload of Frobenius norm for zero type
Representation of an LU factorization.
tensor< T, n, n > U
Upper triangular factor.
tensor< int, n > P
Row permutation indices due to partial pivoting.
tensor< T, n, n > L
Lower triangular factor. Has ones on diagonal.
Arbitrary-rank tensor class.
A sentinel struct for eliding no-op tensor operations.
SERAC_HOST_DEVICE auto operator=(T)
anything assigned to zero does not change its value and returns zero
SERAC_HOST_DEVICE auto operator()(T...) const
zero can be accessed like a multidimensional array