doxygen/html/tensor_8hpp_source.html

 // Copyright (c) 2019-2024, Lawrence Livermore National Security, LLC and

 // other Serac Project Developers. See the top-level LICENSE file for

 // details.

 //

 // SPDX-License-Identifier: (BSD-3-Clause)


 #pragma once


 #include "serac/infrastructure/accelerator.hpp"


 #include "detail/metaprogramming.hpp"


 #include <cmath>


 namespace serac {


 template <typename T, int... n>

 struct tensor;


 template <typename T, int m, int... n>

 struct tensor<T, m, n...> {

   template <typename i_type>

   SERAC_HOST_DEVICE constexpr auto& operator()(i_type i)

   {

     return data[i];

   }

   template <typename i_type>

   SERAC_HOST_DEVICE constexpr auto& operator()(i_type i) const

   {

     return data[i];

   }

   template <typename i_type, typename... jklm_type>

   SERAC_HOST_DEVICE constexpr auto& operator()(i_type i, jklm_type... jklm)

   {

     return data[i](jklm...);

   }

   template <typename i_type, typename... jklm_type>

   SERAC_HOST_DEVICE constexpr auto& operator()(i_type i, jklm_type... jklm) const

   {

     return data[i](jklm...);

   }


   SERAC_HOST_DEVICE constexpr auto&       operator[](int i) { return data[i]; }

   SERAC_HOST_DEVICE constexpr const auto& operator[](int i) const { return data[i]; }


   tensor<T, n...> data[m];

 };


 template <typename T, int m>

 struct tensor<T, m> {

   template <typename i_type>

   SERAC_HOST_DEVICE constexpr auto& operator()(i_type i)

   {

     return data[i];

   }

   template <typename i_type>

   SERAC_HOST_DEVICE constexpr auto& operator()(i_type i) const

   {

     return data[i];

   }

   SERAC_HOST_DEVICE constexpr auto&       operator[](int i) { return data[i]; }

   SERAC_HOST_DEVICE constexpr const auto& operator[](int i) const { return data[i]; }


   template <int last_dimension = m, typename = typename std::enable_if<last_dimension == 1>::type>

   SERAC_HOST_DEVICE constexpr operator T()

   {

     return data[0];

   }


   template <int last_dimension = m, typename = typename std::enable_if<last_dimension == 1>::type>

   SERAC_HOST_DEVICE constexpr operator T() const

   {

     return data[0];

   }


   T data[m];

 };


 template <typename T, int n1>

 tensor(const T (&data)[n1]) -> tensor<T, n1>;


 template <typename T, int n1, int n2>

 tensor(const T (&data)[n1][n2]) -> tensor<T, n1, n2>;


 using vec2 = tensor<double, 2>;

 using vec3 = tensor<double, 3>;


 using mat2 = tensor<double, 2, 2>;

 using mat3 = tensor<double, 3, 3>;


 struct zero {

   SERAC_HOST_DEVICE operator double() { return 0.0; }


   template <typename T, int... n>

   SERAC_HOST_DEVICE operator tensor<T, n...>()

   {

     return tensor<T, n...>{};

   }


   template <typename... T>

   SERAC_HOST_DEVICE auto operator()(T...) const

   {

     return zero{};

   }


   template <typename T>

   SERAC_HOST_DEVICE auto operator=(T)

   {

     return zero{};

   }

 };


 template <typename T>

 struct is_zero : std::false_type {

 };


 template <>

 struct is_zero<zero> : std::true_type {

 };


 SERAC_HOST_DEVICE constexpr auto operator+(zero, zero) { return zero{}; }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator+(zero, T other)

 {

   return other;

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator+(T other, zero)

 {

   return other;

 }


 SERAC_HOST_DEVICE constexpr auto operator-(zero) { return zero{}; }


 SERAC_HOST_DEVICE constexpr auto operator-(zero, zero) { return zero{}; }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator-(zero, T other)

 {

   return -other;

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator-(T other, zero)

 {

   return other;

 }


 SERAC_HOST_DEVICE constexpr auto operator*(zero, zero) { return zero{}; }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator*(zero, T /*other*/)

 {

   return zero{};

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator*(T /*other*/, zero)

 {

   return zero{};

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto operator/(zero, T /*other*/)

 {

   return zero{};

 }


 template <typename T>

 void operator/(T, zero)

 {

   static_assert(::detail::always_false<T>{}, "Error: Can't divide by zero!");

 }


 SERAC_HOST_DEVICE constexpr auto operator+=(zero, zero) { return zero{}; }


 SERAC_HOST_DEVICE constexpr auto operator-=(zero, zero) { return zero{}; }


 template <int i>

 SERAC_HOST_DEVICE zero& get(zero& x)

 {

   return x;

 }


 template <int i>

 SERAC_HOST_DEVICE zero get(const zero&)

 {

   return zero{};

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr zero dot(const T&, zero)

 {

   return zero{};

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr zero dot(zero, const T&)

 {

   return zero{};

 }


 template <typename T, int n1, int n2 = 1>

 using reduced_tensor = std::conditional_t<

     (n1 == 1 && n2 == 1), double,

     std::conditional_t<n1 == 1, tensor<T, n2>, std::conditional_t<n2 == 1, tensor<T, n1>, tensor<T, n1, n2>>>>;


 template <typename T, int... n>

 SERAC_HOST_DEVICE constexpr auto tensor_with_shape(std::integer_sequence<int, n...>)

 {

   return tensor<T, n...>{};

 }


 SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <typename lambda_type>

 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f());

   return tensor<T>{f()};

 }


 SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, typename lambda_type>

 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1));

   tensor<T, n1> A{};

   for (int i = 0; i < n1; i++) {

     A(i) = f(i);

   }

   return A;

 }


 SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, int n2, typename lambda_type>

 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1, n2));

   tensor<T, n1, n2> A{};

   for (int i = 0; i < n1; i++) {

     for (int j = 0; j < n2; j++) {

       A(i, j) = f(i, j);

     }

   }

   return A;

 }


 SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, int n2, int n3, typename lambda_type>

 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1, n2, n3));

   tensor<T, n1, n2, n3> A{};

   for (int i = 0; i < n1; i++) {

     for (int j = 0; j < n2; j++) {

       for (int k = 0; k < n3; k++) {

         A(i, j, k) = f(i, j, k);

       }

     }

   }

   return A;

 }


 SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, int n2, int n3, int n4, typename lambda_type>

 SERAC_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1, n2, n3, n4));

   tensor<T, n1, n2, n3, n4> A{};

   for (int i = 0; i < n1; i++) {

     for (int j = 0; j < n2; j++) {

       for (int k = 0; k < n3; k++) {

         for (int l = 0; l < n4; l++) {

           A(i, j, k, l) = f(i, j, k, l);

         }

       }

     }

   }

   return A;

 }


 template <typename S, typename T, int m, int... n>

 SERAC_HOST_DEVICE constexpr auto operator+(const tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   tensor<decltype(S{} + T{}), m, n...> C{};

   for (int i = 0; i < m; i++) {

     C[i] = A[i] + B[i];

   }

   return C;

 }


 template <typename T, int m, int... n>

 SERAC_HOST_DEVICE constexpr auto operator-(const tensor<T, m, n...>& A)

 {

   tensor<T, m, n...> B{};

   for (int i = 0; i < m; i++) {

     B[i] = -A[i];

   }

   return B;

 }


 template <typename S, typename T, int m, int... n>

 SERAC_HOST_DEVICE constexpr auto operator-(const tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   tensor<decltype(S{} + T{}), m, n...> C{};

   for (int i = 0; i < m; i++) {

     C[i] = A[i] - B[i];

   }

   return C;

 }


 template <typename S, typename T, int m, int... n>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   for (int i = 0; i < m; i++) {

     A[i] += B[i];

   }

   return A;

 }


 #if 0

 template <typename T>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T>& A, const T& B)

 {

   return A.data += B;

 }

 #endif


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, n, 1>& A, const tensor<T, n>& B)

 {

   for (int i = 0; i < n; i++) {

     A.data[i][0] += B[i];

   }

   return A;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1, n>& A, const tensor<T, n>& B)

 {

   for (int i = 0; i < n; i++) {

     A.data[0][i] += B[i];

   }

   return A;

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1>& A, const T& B)

 {

   return A.data[0] += B;

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1, 1>& A, const T& B)

 {

   return A.data[0][0] += B;

 }


 template <typename T, int... n>

 SERAC_HOST_DEVICE constexpr auto& operator+=(tensor<T, n...>& A, zero)

 {

   return A;

 }


 template <typename S, typename T, int m, int... n>

 SERAC_HOST_DEVICE constexpr auto& operator-=(tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   for (int i = 0; i < m; i++) {

     A[i] -= B[i];

   }

   return A;

 }


 template <typename T, int... n>

 SERAC_HOST_DEVICE constexpr auto& operator-=(tensor<T, n...>& A, zero)

 {

   return A;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto outer(double A, tensor<T, n> B)

 {

   tensor<decltype(double{} * T{}), n> AB{};

   for (int i = 0; i < n; i++) {

     AB[i] = A * B[i];

   }

   return AB;

 }


 template <typename T, int m>

 SERAC_HOST_DEVICE constexpr auto outer(const tensor<T, m>& A, double B)

 {

   tensor<decltype(T{} * double{}), m> AB{};

   for (int i = 0; i < m; i++) {

     AB[i] = A[i] * B;

   }

   return AB;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto outer(zero, const tensor<T, n>&)

 {

   return zero{};

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto outer(const tensor<T, n>&, zero)

 {

   return zero{};

 }


 template <typename S, typename T, int m, int n>

 SERAC_HOST_DEVICE constexpr auto outer(const tensor<S, m>& A, const tensor<T, n>& B)

 {

   tensor<decltype(S{} * T{}), m, n> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i][j] = A[i] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 SERAC_HOST_DEVICE constexpr auto inner(const tensor<S, m, n>& A, const tensor<T, m, n>& B)

 {

   decltype(S{} * T{}) sum{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       sum += A[i][j] * B[i][j];

     }

   }

   return sum;

 }


 template <typename S, typename T, int m>

 SERAC_HOST_DEVICE constexpr auto inner(const tensor<S, m>& A, const tensor<T, m>& B)

 {

   decltype(S{} * T{}) sum{};

   for (int i = 0; i < m; i++) {

     sum += A[i] * B[i];

   }

   return sum;

 }


 SERAC_HOST_DEVICE constexpr auto inner(double A, double B) { return A * B; }


 template <typename S, typename T, int m, int n, int p>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p>& B)

 {

   tensor<decltype(S{} * T{}), m, p> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < p; j++) {

       for (int k = 0; k < n; k++) {

         AB[i][j] = AB[i][j] + A[i][k] * B[k][j];

       }

     }

   }

   return AB;

 }


 template <typename T, int m>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<T, m>& A, double B)

 {

   return A * B;

 }


 template <typename T, int m>

 SERAC_HOST_DEVICE constexpr auto dot(double B, const tensor<T, m>& A)

 {

   return B * A;

 }


 template <typename S, typename T, int m>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m>& B)

 {

   decltype(S{} * T{}) AB{};

   for (int i = 0; i < m; i++) {

     AB = AB + A[i] * B[i];

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n>& B)

 {

   tensor<decltype(S{} * T{}), n> AB{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < m; j++) {

       AB[i] = AB[i] + A[j] * B[j][i];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n, p>& B)

 {

   tensor<decltype(S{} * T{}), n, p> AB{};

   for (int j = 0; j < m; j++) {

     AB = AB + A[j] * B[j];

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n, p, q>& B)

 {

   tensor<decltype(S{} * T{}), n, p, q> AB{};

   for (int j = 0; j < m; j++) {

     AB = AB + A[j] * B[j];

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n>& B)

 {

   tensor<decltype(S{} * T{}), m> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i] = AB[i] + A[i][j] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p, int q, int r>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p, q, r>& B)

 {

   tensor<decltype(S{} * T{}), m, p, q, r> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i] = AB[i] + A[i][j] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p, q>& B)

 {

   tensor<decltype(S{} * T{}), m, p, q> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i] = AB[i] + A[i][j] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n, p>& A, const tensor<T, p>& B)

 {

   tensor<decltype(S{} * T{}), m, n> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         AB[i][j] += A[i][j][k] * B[k];

       }

     }

   }

   return AB;

 }


 template <typename S, typename T, typename U, int m, int n>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m>& u, const tensor<T, m, n>& A, const tensor<U, n>& v)

 {

   decltype(S{} * T{} * U{}) uAv{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       uAv += u[i] * A[i][j] * v[j];

     }

   }

   return uAv;

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SERAC_HOST_DEVICE constexpr auto dot(const tensor<S, m, n, p, q>& A, const tensor<T, q>& B)

 {

   tensor<decltype(S{} * T{}), m, n, p> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         for (int l = 0; l < q; l++) {

           AB[i][j][k] += A[i][j][k][l] * B[l];

         }

       }

     }

   }

   return AB;

 }


 template <typename T>

 auto cross(const tensor<T, 3, 2>& A)

 {

   return tensor<T, 3>{A(1, 0) * A(2, 1) - A(2, 0) * A(1, 1), A(2, 0) * A(0, 1) - A(0, 0) * A(2, 1),

                       A(0, 0) * A(1, 1) - A(1, 0) * A(0, 1)};

 }


 template <typename T>

 auto cross(const tensor<T, 2, 1>& v)

 {

   return tensor<T, 2>{v(1, 0), -v(0, 0)};

 }


 template <typename T>

 auto cross(const tensor<T, 2>& v)

 {

   return tensor<T, 2>{v[1], -v[0]};

 }


 template <typename S, typename T>

 auto cross(const tensor<S, 3>& u, const tensor<T, 3>& v)

 {

   return tensor<decltype(S{} * T{}), 3>{u(1) * v(2) - u(2) * v(1), u(2) * v(0) - u(0) * v(2),

                                         u(0) * v(1) - u(1) * v(0)};

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SERAC_HOST_DEVICE constexpr auto double_dot(const tensor<S, m, n, p, q>& A, const tensor<T, p, q>& B)

 {

   tensor<decltype(S{} * T{}), m, n> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         for (int l = 0; l < q; l++) {

           AB[i][j] += A[i][j][k][l] * B[k][l];

         }

       }

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p>

 SERAC_HOST_DEVICE constexpr auto double_dot(const tensor<S, m, n, p>& A, const tensor<T, n, p>& B)

 {

   tensor<decltype(S{} * T{}), m> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         AB[i] += A[i][j][k] * B[j][k];

       }

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 constexpr auto double_dot(const tensor<S, m, n>& A, const tensor<T, m, n>& B)

 {

   decltype(S{} * T{}) AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB += A[i][j] * B[i][j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int... m, int... n>

 SERAC_HOST_DEVICE constexpr auto operator*(const tensor<S, m...>& A, const tensor<T, n...>& B)

 {

   return dot(A, B);

 }


 template <typename T, int m>

 SERAC_HOST_DEVICE constexpr auto squared_norm(const tensor<T, m>& A)

 {

   T total{};

   for (int i = 0; i < m; i++) {

     total += A[i] * A[i];

   }

   return total;

 }


 template <typename T, int m, int n>

 SERAC_HOST_DEVICE constexpr auto squared_norm(const tensor<T, m, n>& A)

 {

   T total{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       total += A[i][j] * A[i][j];

     }

   }

   return total;

 }


 template <typename T, int... n>

 SERAC_HOST_DEVICE constexpr auto squared_norm(const tensor<T, n...>& A)

 {

   T total{};

   for_constexpr<n...>([&](auto... i) { total += A(i...) * A(i...); });

   return total;

 }


 template <typename T, int... n>

 SERAC_HOST_DEVICE auto norm(const tensor<T, n...>& A)

 {

   using std::sqrt;

   return sqrt(squared_norm(A));

 }


 SERAC_HOST_DEVICE constexpr auto norm(zero) { return zero{}; }


 template <typename T, int... n>

 SERAC_HOST_DEVICE auto normalize(const tensor<T, n...>& A)

 {

   return A / norm(A);

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto tr(const tensor<T, n, n>& A)

 {

   T trA{};

   for (int i = 0; i < n; i++) {

     trA = trA + A[i][i];

   }

   return trA;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto sym(const tensor<T, n, n>& A)

 {

   tensor<T, n, n> symA{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < n; j++) {

       symA[i][j] = 0.5 * (A[i][j] + A[j][i]);

     }

   }

   return symA;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto antisym(const tensor<T, n, n>& A)

 {

   tensor<T, n, n> antisymA{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < n; j++) {

       antisymA[i][j] = 0.5 * (A[i][j] - A[j][i]);

     }

   }

   return antisymA;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto dev(const tensor<T, n, n>& A)

 {

   auto devA = A;

   auto trA  = tr(A);

   for (int i = 0; i < n; i++) {

     devA[i][i] -= trA / n;

   }

   return devA;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto diagonal_matrix(const tensor<T, n, n>& A)

 {

   tensor<T, n, n> D{};

   for (int i = 0; i < n; i++) {

     D[i][i] = A[i][i];

   }

   return D;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr tensor<T, n, n> diag(const tensor<T, n>& d)

 {

   tensor<T, n, n> D{};

   for (int i = 0; i < n; i++) {

     D[i][i] = d[i];

   }

   return D;

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr tensor<T, n> diag(const tensor<T, n, n>& D)

 {

   tensor<T, n> d{};

   for (int i = 0; i < n; i++) {

     d[i] = D[i][i];

   }

   return d;

 }


 template <int dim>

 SERAC_HOST_DEVICE constexpr tensor<double, dim, dim> DenseIdentity()

 {

   tensor<double, dim, dim> I{};

   for (int i = 0; i < dim; i++) {

     for (int j = 0; j < dim; j++) {

       I[i][j] = (i == j);

     }

   }

   return I;

 }


 template <typename T, int m, int n>

 SERAC_HOST_DEVICE constexpr auto transpose(const tensor<T, m, n>& A)

 {

   tensor<T, n, m> AT{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < m; j++) {

       AT[i][j] = A[j][i];

     }

   }

   return AT;

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto det(const tensor<T, 2, 2>& A)

 {

   return A[0][0] * A[1][1] - A[0][1] * A[1][0];

 }

 template <typename T>

 SERAC_HOST_DEVICE constexpr auto det(const tensor<T, 3, 3>& A)

 {

   return A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] + A[0][2] * A[1][0] * A[2][1] -

          A[0][0] * A[1][2] * A[2][1] - A[0][1] * A[1][0] * A[2][2] - A[0][2] * A[1][1] * A[2][0];

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto detApIm1(const tensor<T, 2, 2>& A)

 {

   // From the Cayley-Hamilton theorem, we get that for any N by N matrix A,

   // det(A - I) - 1 = I1(A) + I2(A) + ... + IN(A),

   // where the In are the principal invariants of A.

   // We inline the definitions of the principal invariants to increase computational speed.


   // equivalent to tr(A) + det(A)

   return A(0, 0) - A(0, 1) * A(1, 0) + A(1, 1) + A(0, 0) * A(1, 1);

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto detApIm1(const tensor<T, 3, 3>& A)

 {

   // For notes on the implementation, see the 2x2 version.


   // clang-format off

   // equivalent to tr(A) + I2(A) + det(A)

   return A(0, 0) + A(1, 1) + A(2, 2)

        - A(0, 1) * A(1, 0) * (1 + A(2, 2))

        + A(0, 0) * A(1, 1) * (1 + A(2, 2))

        - A(0, 2) * A(2, 0) * (1 + A(1, 1))

        - A(1, 2) * A(2, 1) * (1 + A(0, 0))

        + A(0, 0) * A(2, 2)

        + A(1, 1) * A(2, 2)

        + A(0, 1) * A(1, 2) * A(2, 0)

        + A(0, 2) * A(1, 0) * A(2, 1);

   // clang-format on

 }


 template <typename T, int dim>

 auto matrix_sqrt(const tensor<T, dim, dim>& A)

 {

   auto B = A;

   for (int i = 0; i < 15; i++) {

     B = 0.5 * (B + dot(A, inv(B)));

   }

   return B;

 }


 template <int i1, int i2, typename S, int m, int... n, typename T, int p, int q>

 SERAC_HOST_DEVICE auto contract(const tensor<S, m, n...>& A, const tensor<T, p, q>& B)

 {

   constexpr int Adims[] = {m, n...};

   constexpr int Bdims[] = {p, q};

   static_assert(sizeof...(n) < 3);

   static_assert(Adims[i1] == Bdims[i2], "error: incompatible tensor dimensions");


   // first, we have to figure out the dimensions of the output tensor

   constexpr int new_dim = (i2 == 0) ? q : p;

   constexpr int d1      = (i1 == 0) ? new_dim : Adims[0];

   constexpr int d2      = (i1 == 1) ? new_dim : Adims[1];

   constexpr int d3      = sizeof...(n) == 1 ? 0 : ((i1 == 2) ? new_dim : Adims[2]);


   // the type of the output tensor is easier to figure out

   using U = decltype(S{} * T{});


   auto C = []() {

     if constexpr (d3 == 0) return tensor<U, d1, d2>{};

     if constexpr (d3 != 0) return tensor<U, d1, d2, d3>{};

   }();


   if constexpr (d3 == 0) {

     for (int i = 0; i < d1; i++) {

       for (int j = 0; j < d2; j++) {

         U sum{};

         for (int k = 0; k < Adims[i1]; k++) {

           if constexpr (i1 == 0 && i2 == 0) sum += A(k, j) * B(k, i);

           if constexpr (i1 == 1 && i2 == 0) sum += A(i, k) * B(k, j);

           if constexpr (i1 == 0 && i2 == 1) sum += A(k, j) * B(i, k);

           if constexpr (i1 == 1 && i2 == 1) sum += A(i, k) * B(j, k);

         }

         C(i, j) = sum;

       }

     }

   } else {

     for (int i = 0; i < d1; i++) {

       for (int j = 0; j < d2; j++) {

         for (int k = 0; k < d3; k++) {

           U sum{};

           for (int l = 0; l < Adims[i1]; l++) {

             if constexpr (i1 == 0 && i2 == 0) sum += A(l, j, k) * B(l, i);

             if constexpr (i1 == 1 && i2 == 0) sum += A(i, l, k) * B(l, j);

             if constexpr (i1 == 2 && i2 == 0) sum += A(i, j, l) * B(l, k);

             if constexpr (i1 == 0 && i2 == 1) sum += A(l, j, k) * B(i, l);

             if constexpr (i1 == 1 && i2 == 1) sum += A(i, l, k) * B(j, l);

             if constexpr (i1 == 2 && i2 == 1) sum += A(i, j, l) * B(k, l);

           }

           C(i, j, k) = sum;

         }

       }

     }

   }


   return C;

 }


 template <int i1, int i2, typename T>

 SERAC_HOST_DEVICE auto contract(const zero&, const T&)

 {

   return zero{};

 }


 template <typename T, int... n>

 double relative_error(tensor<T, n...> A, tensor<T, n...> B)

 {

   return norm(A - B) / norm(A);

 }


 template <int n>

 SERAC_HOST_DEVICE bool is_symmetric(tensor<double, n, n> A, double tolerance = 1.0e-8)

 {

   for (int i = 0; i < n; ++i) {

     for (int j = i + 1; j < n; ++j) {

       if (std::abs(A(i, j) - A(j, i)) > tolerance) {

         return false;

       };

     }

   }

   return true;

 }


 inline SERAC_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor<double, 2, 2> A)

 {

   if (!is_symmetric(A)) {

     return false;

   }

   if (A(0, 0) < 0.0) {

     return false;

   }

   if (det(A) < 0.0) {

     return false;

   }

   return true;

 }

 inline SERAC_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor<double, 3, 3> A)

 {

   if (!is_symmetric(A)) {

     return false;

   }

   if (det(A) < 0.0) {

     return false;

   }

   auto subtensor = make_tensor<2, 2>([A](int i, int j) { return A(i, j); });

   if (!is_symmetric_and_positive_definite(subtensor)) {

     return false;

   }

   return true;

 }


 template <typename T, int n>

 struct LuFactorization {

   tensor<int, n>  P;

   tensor<T, n, n> L;

   tensor<T, n, n> U;

 };


 template <typename T, int n, int... m>

 SERAC_HOST_DEVICE constexpr auto solve_lower_triangular(const tensor<T, n, n>& L, const tensor<T, n, m...>& b,

                                                         const tensor<int, n>& P)

 {

   tensor<T, n, m...> y{};

   for (int i = 0; i < n; i++) {

     auto c = b[P[i]];

     for (int j = 0; j < i; j++) {

       c -= L[i][j] * y[j];

     }

     y[i] = c / L[i][i];

   }

   return y;

 }


 template <typename T, int n, int... m>

 SERAC_HOST_DEVICE constexpr auto solve_lower_triangular(const tensor<T, n, n>& L, const tensor<T, n, m...>& b)

 {

   // no permutation provided, so just map each equation to itself

   // TODO make a convienience function for ranges like this

   // BT 05/09/2022

   tensor<int, n> P(make_tensor<n>([](auto i) { return i; }));


   return solve_lower_triangular(L, b, P);

 }


 template <typename T, int n, int... m>

 SERAC_HOST_DEVICE constexpr auto solve_upper_triangular(const tensor<T, n, n>& U, const tensor<T, n, m...>& y)

 {

   tensor<T, n, m...> x{};

   for (int i = n - 1; i >= 0; i--) {

     auto c = y[i];

     for (int j = i + 1; j < n; j++) {

       c -= U[i][j] * x[j];

     }

     x[i] = c / U[i][i];

   }

   return x;

 }


 template <typename S, typename T, int n, int... m>

 SERAC_HOST_DEVICE constexpr auto linear_solve(const LuFactorization<S, n>& lu_factors, const tensor<T, n, m...>& b)

 {

   // Forward substitution

   // solve Ly = b

   const auto y = solve_lower_triangular(lu_factors.L, b, lu_factors.P);


   // Back substitution

   // Solve Ux = y

   return solve_upper_triangular(lu_factors.U, y);

 }


 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto linear_solve(const LuFactorization<T, n>& /* lu_factors */, const zero /* b */)

 {

   return zero{};

 }


 SERAC_HOST_DEVICE constexpr tensor<double, 2, 2> inv(const tensor<double, 2, 2>& A)

 {

   double inv_detA(1.0 / det(A));


   tensor<double, 2, 2> invA{};


   invA[0][0] = A[1][1] * inv_detA;

   invA[0][1] = -A[0][1] * inv_detA;

   invA[1][0] = -A[1][0] * inv_detA;

   invA[1][1] = A[0][0] * inv_detA;


   return invA;

 }


 SERAC_HOST_DEVICE constexpr tensor<double, 3, 3> inv(const tensor<double, 3, 3>& A)

 {

   double inv_detA(1.0 / det(A));


   tensor<double, 3, 3> invA{};


   invA[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) * inv_detA;

   invA[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) * inv_detA;

   invA[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) * inv_detA;

   invA[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) * inv_detA;

   invA[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) * inv_detA;

   invA[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) * inv_detA;

   invA[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) * inv_detA;

   invA[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) * inv_detA;

   invA[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) * inv_detA;


   return invA;

 }

 template <typename T, int n>

 SERAC_HOST_DEVICE constexpr auto inv(const tensor<T, n, n>& A)

 {

   auto I = DenseIdentity<n>();

   return linear_solve(A, I);

 }


 template <typename T, int m, int... n>

 auto& operator<<(std::ostream& out, const tensor<T, m, n...>& A)

 {

   out << '{' << A[0];

   for (int i = 1; i < m; i++) {

     out << ", " << A[i];

   }

   out << '}';

   return out;

 }


 inline auto& operator<<(std::ostream& out, zero)

 {

   out << "zero";

   return out;

 }


 inline SERAC_HOST_DEVICE void print(double value) { printf("%f", value); }


 template <int m, int... n>

 SERAC_HOST_DEVICE void print(const tensor<double, m, n...>& A)

 {

   printf("{");

   print(A[0]);

   for (int i = 1; i < m; i++) {

     printf(",");

     print(A[i]);

   }

   printf("}");

 }


 template <int n>

 SERAC_HOST_DEVICE constexpr auto chop(const tensor<double, n>& A)

 {

   auto copy = A;

   for (int i = 0; i < n; i++) {

     if (copy[i] * copy[i] < 1.0e-20) {

       copy[i] = 0.0;

     }

   }

   return copy;

 }


 template <int m, int n>

 SERAC_HOST_DEVICE constexpr auto chop(const tensor<double, m, n>& A)

 {

   auto copy = A;

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       if (copy[i][j] * copy[i][j] < 1.0e-20) {

         copy[i][j] = 0.0;

       }

     }

   }

   return copy;

 }


 namespace detail {


 template <typename T1, typename T2>

 struct outer_prod;


 template <int... m, int... n>

 struct outer_prod<tensor<double, m...>, tensor<double, n...>> {

   using type = tensor<double, m..., n...>;

 };


 template <int... n>

 struct outer_prod<double, tensor<double, n...>> {

   using type = tensor<double, n...>;

 };


 template <int... n>

 struct outer_prod<tensor<double, n...>, double> {

   using type = tensor<double, n...>;

 };


 template <>

 struct outer_prod<double, double> {

   using type = tensor<double>;

 };


 template <typename T>

 struct outer_prod<zero, T> {

   using type = zero;

 };


 template <typename T>

 struct outer_prod<T, zero> {

   using type = zero;

 };


 }  // namespace detail


 template <typename T1, typename T2>

 using outer_product_t = typename detail::outer_prod<T1, T2>::type;


 inline SERAC_HOST_DEVICE auto get_gradient(double /* arg */) { return zero{}; }


 template <int... n>

 SERAC_HOST_DEVICE constexpr auto get_gradient(const tensor<double, n...>& /* arg */)

 {

   return zero{};

 }


 SERAC_HOST_DEVICE constexpr auto chain_rule(const zero /* df_dx */, const zero /* dx */) { return zero{}; }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto chain_rule(const zero /* df_dx */, const T /* dx */)

 {

   return zero{};

 }


 template <typename T>

 SERAC_HOST_DEVICE constexpr auto chain_rule(const T /* df_dx */, const zero /* dx */)

 {

   return zero{};

 }


 SERAC_HOST_DEVICE constexpr auto chain_rule(const double df_dx, const double dx) { return df_dx * dx; }


 template <int... n>

 SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor<double, n...>& df_dx, const double dx)

 {

   return df_dx * dx;

 }


 template <int... n>

 SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor<double, n...>& df_dx, const tensor<double, n...>& dx)

 {

   double total{};

   for_constexpr<n...>([&](auto... i) { total += df_dx(i...) * dx(i...); });

   return total;

 }


 template <int m, int... n>

 SERAC_HOST_DEVICE constexpr auto chain_rule(const tensor<double, m, n...>& df_dx, const tensor<double, n...>& dx)

 {

   tensor<double, m> total{};

   for (int i = 0; i < m; i++) {

     total[i] = chain_rule(df_dx[i], dx);

   }

   return total;

 }


 template <int m, int n, int... p>

 SERAC_HOST_DEVICE auto chain_rule(const tensor<double, m, n, p...>& df_dx, const tensor<double, p...>& dx)

 {

   tensor<double, m, n> total{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       total[i][j] = chain_rule(df_dx[i][j], dx);

     }

   }

   return total;

 }


 template <typename T, int... n>

 SERAC_HOST_DEVICE constexpr int size(const tensor<T, n...>&)

 {

   return (n * ... * 1);

 }


 SERAC_HOST_DEVICE constexpr int size(const double&) { return 1; }


 SERAC_HOST_DEVICE constexpr int size(zero) { return 0; }


 template <int i, typename T, int... n>

 SERAC_HOST_DEVICE constexpr int dimension(const tensor<T, n...>&)

 {

   constexpr int dimensions[] = {n...};

   return dimensions[i];

 }


 template <typename T, int m, int... n>

 SERAC_HOST_DEVICE constexpr int leading_dimension(tensor<T, m, n...>)

 {

   return m;

 }


 template <typename T, int... n>

 bool isnan(const tensor<T, n...>& A)

 {

   bool found_nan = false;

   for_constexpr<n...>([&](auto... i) { found_nan |= std::isnan(A(i...)); });

   return found_nan;

 }


 inline bool isnan(const zero&) { return false; }


 }  // namespace serac


 // todo: port to current tensor class:

 #if 0

 // eigendecomposition for symmetric A

 //

 // based on "A robust algorithm for finding the eigenvalues and

 // eigenvectors of 3x3 symmetric matrices", by Scherzinger & Dohrmann

 __host__ __device__

 inline void eig(const r2tensor < 3, 3 > & A,

                       r1tensor < 3 >    & eta,

                       r2tensor < 3, 3 > & Q) {


   for (int i = 0; i < 3; i++) {

     eta(i) = 1.0;

     for (int j = 0; j < 3; j++) {

       Q(i,j) = (i == j);

     }

   }


   auto A_dev = dev(A);


   double J2 = I2(A_dev);

   double J3 = I3(A_dev);


   if (J2 > 0.0) {


     // angle used to find eigenvalues

     double tmp = (0.5 * J3) * pow(3.0 / J2, 1.5);

     double alpha = acos(fmin(fmax(tmp, -1.0), 1.0)) / 3.0;


     // consider the most distinct eigenvalue first

     if (6.0 * alpha < M_PI) {

       eta(0) = 2 * sqrt(J2 / 3.0) * cos(alpha);

     } else {

       eta(0) = 2 * sqrt(J2 / 3.0) * cos(alpha + 2.0 * M_PI / 3.0);

     }


     // find the eigenvector for that eigenvalue

     r1tensor < 3 > r[3];


     int imax = -1;

     double norm_max = -1.0;


     for (int i = 0; i < 3; i++) {


       for (int j = 0; j < 3; j++) {

         r[i](j) = A_dev(j,i) - (i == j) * eta(0);

       }


       double norm_r = norm(r[i]);

       if (norm_max < norm_r) {

         imax = i;

         norm_max = norm_r;

       }


     }


     r1tensor < 3 > s0, s1, t1, t2, v0, v1, v2, w;


     s0 = normalize(r[imax]);

     t1 = r[(imax+1)%3] - dot(r[(imax+1)%3], s0) * s0;

     t2 = r[(imax+2)%3] - dot(r[(imax+2)%3], s0) * s0;

     s1 = normalize((norm(t1) > norm(t2)) ? t1 : t2);


     // record the first eigenvector

     v0 = cross(s0, s1);

     for (int i = 0; i < 3; i++) {

       Q(i,0) = v0(i);

     }


     // get the other two eigenvalues by solving the

     // remaining quadratic characteristic polynomial

     auto A_dev_s0 = dot(A_dev, s0);

     auto A_dev_s1 = dot(A_dev, s1);


     double A11 = dot(s0, A_dev_s0);

     double A12 = dot(s0, A_dev_s1);

     double A21 = dot(s1, A_dev_s0);

     double A22 = dot(s1, A_dev_s1);


     double delta = 0.5 * signum(A11-A22) * sqrt((A11-A22)*(A11-A22) + 4*A12*A21);


     eta(1) = 0.5 * (A11 + A22) - delta;

     eta(2) = 0.5 * (A11 + A22) + delta;


     // if the remaining eigenvalues are exactly the same

     // then just use the basis for the orthogonal complement

     // found earlier

     if (fabs(delta) <= 1.0e-15) {


       for (int i = 0; i < 3; i++){

         Q(i,1) = s0(i);

         Q(i,2) = s1(i);

       }


     // otherwise compute the remaining eigenvectors

     } else {


       t1 = A_dev_s0 - eta(1) * s0;

       t2 = A_dev_s1 - eta(1) * s1;


       w = normalize((norm(t1) > norm(t2)) ? t1 : t2);


       v1 = normalize(cross(w, v0));

       for (int i = 0; i < 3; i++) Q(i,1) = v1(i);


       // define the last eigenvector as

       // the direction perpendicular to the

       // first two directions

       v2 = normalize(cross(v0, v1));

       for (int i = 0; i < 3; i++) Q(i,2) = v2(i);


     }


     // eta are actually eigenvalues of A_dev, so

     // shift them to get eigenvalues of A

     eta += tr(A) / 3.0;


   }


 }


 inline float angle_between(const vec < 2 > & a, const vec < 2 > & b) {

   return acos(clip(dot(normalize(a), normalize(b)), -1.0f, 1.0f));

 }


 inline float angle_between(const vec < 3 > & a, const vec < 3 > & b) {

   return acos(clip(dot(normalize(a), normalize(b)), -1.0f, 1.0f));

 }


 // angle between proper orthogonal matrices

 inline float angle_between(const mat < 3, 3 > & U, const mat < 3, 3 > & V) {

   return acos(0.5f * (tr(dot(U, transpose(V))) - 1.0f));

 }


 inline mat < 2, 2 > rotation(const float theta) {

   return mat< 2, 2 >{

     {cos(theta), -sin(theta)},

     { sin(theta), cos(theta) }

   };

 }


 inline mat < 3, 3 > axis_to_rotation(const vec < 3 > & omega) {


   float norm_omega = norm(omega);


   if (fabs(norm_omega) < 0.000001f) {


     return eye< 3 >();


   } else {


     vec3 u = omega / norm_omega;


     float c = cos(norm_omega);

     float s = sin(norm_omega);


     return mat < 3, 3 >{

       {

         u[0]*u[0]*(1.0f - c) + c,

         u[0]*u[1]*(1.0f - c) - u[2]*s,

         u[0]*u[2]*(1.0f - c) + u[1]*s

       },{

         u[1]*u[0]*(1.0f - c) + u[2]*s,

         u[1]*u[1]*(1.0f - c) + c,

         u[1]*u[2]*(1.0f - c) - u[0]*s

       },{

         u[2]*u[0]*(1.0f - c) - u[1]*s,

         u[2]*u[1]*(1.0f - c) + u[0]*s,

         u[2]*u[2]*(1.0f - c) + c

       }

     };


   }


 }


 // assumes R is a proper-orthogonal matrix

 inline vec < 3 > rotation_to_axis(const mat < 3, 3 > & R) {


   float theta = acos(clip(0.5f * (tr(R) - 1.0f), -1.0f, 1.0f));


   float scale;


   // for small angles, prefer series expansion to division by sin(theta) ~ 0

   if (fabs(theta) < 0.00001f) {

     scale = 0.5f + theta * theta / 12.0f;

   }

   else {

     scale = 0.5f * theta / sin(theta);

   }


   return vec3{ R(2,1) - R(1,2), R(0,2) - R(2,0), R(1,0) - R(0,1) } *scale;


 }


 inline mat < 3, 3 > look_at(const vec < 3 > & direction, const vec < 3 > & up = vec3{ 0.0f, 0.0f, 1.0f }) {

   vec3 f = normalize(direction);

   vec3 u = normalize(cross(f, cross(up, f)));

   vec3 l = normalize(cross(u, f));


   return mat3{

     {f[0], l[0], u[0]},

     {f[1], l[1], u[1]},

     {f[2], l[2], u[2]}

   };

 }


 inline mat < 2, 2 > look_at(const vec < 2 > & direction) {

   vec2 f = normalize(direction);

   vec2 l = cross(f);


   return mat2{

     {f[0], l[0]},

     {f[1], l[1]},

   };

 }


 inline mat < 3, 3 > R3_basis(const vec3 & n) {

   float sign = (n[2] >= 0.0f) ? 1.0f : -1.0f;

   float a = -1.0f / (sign + n[2]);

   float b = n[0] * n[1] * a;


   return mat < 3, 3 >{

     {

       1.0f + sign * n[0] * n[0] * a,

       b,

       n[0],

     },{

       sign * b,

       sign + n[1] * n[1] * a,

       n[1]

     },{

       -sign * n[0],

       -n[1],

       n[2]

     }

   };

 }

 #endif


 #include "serac/numerics/functional/isotropic_tensor.hpp"


 #include "serac/numerics/functional/tuple_tensor_dual_functions.hpp"

accelerator.hpp
This file contains the interface used for initializing/terminating any hardware accelerator-related f...

SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING
#define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING
Macro to turn off specific nvcc warnings.
Definition: accelerator.hpp:50

SERAC_HOST_DEVICE
#define SERAC_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38

isotropic_tensor.hpp
Implementation of isotropic tensor classes.

metaprogramming.hpp
Utilities for C++ metaprogramming.

for_constexpr
constexpr SERAC_HOST_DEVICE void for_constexpr(lambda &&f)
multidimensional loop tool that evaluates the lambda body inside the innermost loop.
Definition: metaprogramming.hpp:97

serac
Accelerator functionality.
Definition: serac.cpp:38

serac::inv
constexpr SERAC_HOST_DEVICE auto inv(const tensor< T, n, n > &A)
Definition: tensor.hpp:1606

serac::DenseIdentity
constexpr SERAC_HOST_DEVICE tensor< double, dim, dim > DenseIdentity()
Obtains the identity matrix of the specified dimension.
Definition: tensor.hpp:1183

serac::outer_product_t
typename detail::outer_prod< T1, T2 >::type outer_product_t
a type function that returns the tensor type of an outer product of two tensors
Definition: tensor.hpp:1741

serac::diag
constexpr SERAC_HOST_DEVICE tensor< T, n > diag(const tensor< T, n, n > &D)
Returns an array containing the diagonal entries of a square matrix.
Definition: tensor.hpp:1169

serac::sin
SERAC_HOST_DEVICE auto sin(dual< gradient_type > a)
implementation of sine for dual numbers
Definition: dual.hpp:295

serac::operator-
constexpr SERAC_HOST_DEVICE auto operator-(const tensor< S, m, n... > &A, const tensor< T, m, n... > &B)
return the difference of two tensors
Definition: tensor.hpp:459

serac::relative_error
double relative_error(tensor< T, n... > A, tensor< T, n... > B)
computes the relative error (in the frobenius norm) between two tensors of the same shape
Definition: tensor.hpp:1390

serac::dot
constexpr SERAC_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
Definition: isotropic_tensor.hpp:203

serac::is_symmetric
SERAC_HOST_DEVICE bool is_symmetric(tensor< double, n, n > A, double tolerance=1.0e-8)
Return whether a square rank 2 tensor is symmetric.
Definition: tensor.hpp:1404

serac::chop
constexpr SERAC_HOST_DEVICE auto chop(const tensor< double, m, n > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1683

serac::operator*
constexpr SERAC_HOST_DEVICE auto operator*(const dual< gradient_type > &a, double b)
multiplication of a dual number and a non-dual number
Definition: dual.hpp:109

serac::tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.

serac::cos
SERAC_HOST_DEVICE auto cos(dual< gradient_type > a)
implementation of cosine for dual numbers
Definition: dual.hpp:287

serac::operator-
Domain operator-(const Domain &a, const Domain &b)
create a new domain that is the set difference of a and b
Definition: domain.cpp:500

serac::operator+=
constexpr SERAC_HOST_DEVICE auto & operator+=(tensor< T, n... > &A, zero)
compound assignment (+) between a tensor and zero (no-op)
Definition: tensor.hpp:560

serac::size
constexpr SERAC_HOST_DEVICE int size(zero)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1863

serac::leading_dimension
constexpr SERAC_HOST_DEVICE int leading_dimension(tensor< T, m, n... >)
a function for querying the first dimension of a tensor
Definition: tensor.hpp:1889

serac::get
constexpr T & get(variant< T0, T1 > &v)
Returns the variant member of specified type.
Definition: variant.hpp:338

serac::dot
constexpr SERAC_HOST_DEVICE auto dot(const tensor< S, m, n, p, q > &A, const tensor< T, q > &B)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:889

serac::transpose
constexpr SERAC_HOST_DEVICE auto transpose(const tensor< T, m, n > &A)
Returns the transpose of the matrix.
Definition: tensor.hpp:1199

serac::pow
SERAC_HOST_DEVICE auto pow(dual< gradient_type > a, dual< gradient_type > b)
implementation of a (dual) raised to the b (dual) power
Definition: dual.hpp:376

serac::is_symmetric_and_positive_definite
SERAC_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor< double, 3, 3 > A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1438

serac::make_tensor
SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING constexpr SERAC_HOST_DEVICE auto make_tensor(lambda_type f)
Creates a tensor of requested dimension by subsequent calls to a functor Can be thought of as analogo...
Definition: tensor.hpp:300

serac::get_gradient
constexpr SERAC_HOST_DEVICE auto get_gradient(const tensor< double, n... > &)
get the gradient of type tensor (note: since its stored type is not a dual number,...
Definition: tensor.hpp:1755

serac::chain_rule
SERAC_HOST_DEVICE auto chain_rule(const tensor< double, m, n, p... > &df_dx, const tensor< double, p... > &dx)
Definition: tensor.hpp:1832

serac::contract
SERAC_HOST_DEVICE auto contract(const zero &, const T &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1375

serac::linear_solve
constexpr SERAC_HOST_DEVICE auto linear_solve(const LuFactorization< T, n > &, const zero)
Definition: tensor.hpp:1554

serac::sqrt
SERAC_HOST_DEVICE auto sqrt(dual< gradient_type > x)
implementation of square root for dual numbers
Definition: dual.hpp:279

serac::operator+
constexpr SERAC_HOST_DEVICE auto operator+(dual< gradient_type > a, double b)
addition of a dual number and a non-dual number
Definition: dual.hpp:60

serac::det
constexpr SERAC_HOST_DEVICE auto det(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1221

serac::dev
constexpr SERAC_HOST_DEVICE auto dev(const tensor< T, n, n > &A)
Calculates the deviator of a matrix (rank-2 tensor)
Definition: tensor.hpp:1124

serac::mat2
tensor< double, 2, 2 > mat2
statically sized 2x2 matrix of doubles
Definition: tensor.hpp:117

serac::matrix_sqrt
auto matrix_sqrt(const tensor< T, dim, dim > &A)
compute the matrix square root of a square, real-valued, symmetric matrix i.e. given A,...
Definition: tensor.hpp:1278

serac::solve_upper_triangular
constexpr SERAC_HOST_DEVICE auto solve_upper_triangular(const tensor< T, n, n > &U, const tensor< T, n, m... > &y)
Solves an upper triangular system Ux = y.
Definition: tensor.hpp:1520

serac::operator-=
constexpr SERAC_HOST_DEVICE auto & operator-=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (-) for dual numbers
Definition: dual.hpp:192

serac::tensor_with_shape
constexpr SERAC_HOST_DEVICE auto tensor_with_shape(std::integer_sequence< int, n... >)
Creates a tensor given the dimensions in a std::integer_sequence.
Definition: tensor.hpp:283

serac::squared_norm
constexpr SERAC_HOST_DEVICE auto squared_norm(const tensor< T, n... > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1033

serac::vec3
tensor< double, 3 > vec3
statically sized vector of 3 doubles
Definition: tensor.hpp:115

serac::isnan
bool isnan(const zero &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1904

serac::sym
constexpr SERAC_HOST_DEVICE auto sym(const tensor< T, n, n > &A)
Returns the symmetric part of a square matrix.
Definition: tensor.hpp:1088

serac::antisym
constexpr SERAC_HOST_DEVICE auto antisym(const tensor< T, n, n > &A)
Returns the antisymmetric part of a square matrix.
Definition: tensor.hpp:1105

serac::abs
SERAC_HOST_DEVICE auto abs(dual< gradient_type > x)
Implementation of absolute value function for dual numbers.
Definition: dual.hpp:220

serac::outer
constexpr SERAC_HOST_DEVICE auto outer(const tensor< S, m > &A, const tensor< T, n > &B)
Definition: tensor.hpp:647

serac::acos
SERAC_HOST_DEVICE auto acos(dual< gradient_type > a)
implementation of acos for dual numbers
Definition: dual.hpp:344

serac::dimension
constexpr SERAC_HOST_DEVICE int dimension(const tensor< T, n... > &)
a function for querying the ith dimension of a tensor
Definition: tensor.hpp:1874

serac::operator<<
auto & operator<<(std::ostream &out, zero)
Write a zero out to an output stream.
Definition: tensor.hpp:1636

serac::double_dot
constexpr auto double_dot(const tensor< S, m, n > &A, const tensor< T, m, n > &B)
Definition: tensor.hpp:984

serac::cross
auto cross(const tensor< S, 3 > &u, const tensor< T, 3 > &v)
compute the (right handed) cross product of two 3-vectors
Definition: tensor.hpp:928

serac::operator*
constexpr SERAC_HOST_DEVICE auto operator*(const tensor< S, m... > &A, const tensor< T, n... > &B)
this is a shorthand for dot(A, B)
Definition: tensor.hpp:999

serac::type
constexpr SERAC_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:274

serac::operator-=
constexpr SERAC_HOST_DEVICE auto & operator-=(tensor< T, n... > &A, zero)
compound assignment (-) between a tensor and zero (no-op)
Definition: tensor.hpp:589

serac::operator+=
constexpr SERAC_HOST_DEVICE auto & operator+=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (+) for dual numbers
Definition: dual.hpp:183

serac::tr
constexpr SERAC_HOST_DEVICE auto tr(const tensor< T, n, n > &A)
Returns the trace of a square matrix.
Definition: tensor.hpp:1073

serac::normalize
SERAC_HOST_DEVICE auto normalize(const tensor< T, n... > &A)
Normalizes the tensor Each element is divided by the Frobenius norm of the tensor,...
Definition: tensor.hpp:1062

serac::vec2
tensor< double, 2 > vec2
statically sized vector of 2 doubles
Definition: tensor.hpp:114

serac::detApIm1
constexpr SERAC_HOST_DEVICE auto detApIm1(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1251

serac::diagonal_matrix
constexpr SERAC_HOST_DEVICE auto diagonal_matrix(const tensor< T, n, n > &A)
Returns a square matrix (rank-2 tensor) containing the diagonal entries of the input square matrix wi...
Definition: tensor.hpp:1141

serac::mat3
tensor< double, 3, 3 > mat3
statically sized 3x3 matrix of doubles
Definition: tensor.hpp:118

serac::tensor
tensor(const T(&data)[n1][n2]) -> tensor< T, n1, n2 >
class template argument deduction guide for type tensor.

serac::print
SERAC_HOST_DEVICE void print(const tensor< double, m, n... > &A)
print a tensor using printf, so that it is suitable for use inside cuda kernels.
Definition: tensor.hpp:1654

serac::operator/
constexpr SERAC_HOST_DEVICE auto operator/(const dual< gradient_type > &a, double b)
division of a dual number by a non-dual number
Definition: dual.hpp:130

serac::solve_lower_triangular
constexpr SERAC_HOST_DEVICE auto solve_lower_triangular(const tensor< T, n, n > &L, const tensor< T, n, m... > &b)
Definition: tensor.hpp:1499

serac::reduced_tensor
std::conditional_t<(n1==1 &&n2==1), double, std::conditional_t< n1==1, tensor< T, n2 >, std::conditional_t< n2==1, tensor< T, n1 >, tensor< T, n1, n2 > >> > reduced_tensor
Removes 1s from tensor dimensions For example, a tensor<T, 1, 10> is equivalent to a tensor<T,...
Definition: tensor.hpp:275

serac::inner
constexpr SERAC_HOST_DEVICE auto inner(double A, double B)
Definition: tensor.hpp:697

serac::norm
constexpr SERAC_HOST_DEVICE auto norm(zero)
overload of Frobenius norm for zero type
Definition: tensor.hpp:1054

serac::LuFactorization
Representation of an LU factorization.
Definition: tensor.hpp:1459

serac::LuFactorization::U
tensor< T, n, n > U
Upper triangular factor.
Definition: tensor.hpp:1462

serac::LuFactorization::P
tensor< int, n > P
Row permutation indices due to partial pivoting.
Definition: tensor.hpp:1460

serac::LuFactorization::L
tensor< T, n, n > L
Lower triangular factor. Has ones on diagonal.
Definition: tensor.hpp:1461

serac::is_zero
checks if a type is zero
Definition: tensor.hpp:151

serac::tensor
Arbitrary-rank tensor class.
Definition: tensor.hpp:29

serac::zero
A sentinel struct for eliding no-op tensor operations.
Definition: tensor.hpp:123

serac::zero::operator=
SERAC_HOST_DEVICE auto operator=(T)
anything assigned to zero does not change its value and returns zero
Definition: tensor.hpp:143

serac::zero::operator()
SERAC_HOST_DEVICE auto operator()(T...) const
zero can be accessed like a multidimensional array
Definition: tensor.hpp:136