doxygen/html/tensor_8hpp_source.html

 // Copyright (c) Lawrence Livermore National Security, LLC and

 // other Smith Project Developers. See the top-level LICENSE file for

 // details.

 //

 // SPDX-License-Identifier: (BSD-3-Clause)


 #pragma once


 #include <cmath>


 #include "smith/infrastructure/accelerator.hpp"

 #include "detail/metaprogramming.hpp"


 namespace smith {


 template <typename T, int... n>

 struct tensor;


 template <typename T, int m, int... n>

 struct tensor<T, m, n...> {

   template <typename i_type>

   SMITH_HOST_DEVICE constexpr auto& operator()(i_type i)

   {

     return data[i];

   }

   template <typename i_type>

   SMITH_HOST_DEVICE constexpr auto& operator()(i_type i) const

   {

     return data[i];

   }

   template <typename i_type, typename... jklm_type>

   SMITH_HOST_DEVICE constexpr auto& operator()(i_type i, jklm_type... jklm)

   {

     return data[i](jklm...);

   }

   template <typename i_type, typename... jklm_type>

   SMITH_HOST_DEVICE constexpr auto& operator()(i_type i, jklm_type... jklm) const

   {

     return data[i](jklm...);

   }


   SMITH_HOST_DEVICE constexpr auto& operator[](int i) { return data[i]; }

   SMITH_HOST_DEVICE constexpr const auto& operator[](int i) const { return data[i]; }


   tensor<T, n...> data[m];

 };


 template <typename T, int m>

 struct tensor<T, m> {

   template <typename i_type>

   SMITH_HOST_DEVICE constexpr auto& operator()(i_type i)

   {

     return data[i];

   }

   template <typename i_type>

   SMITH_HOST_DEVICE constexpr auto& operator()(i_type i) const

   {

     return data[i];

   }

   SMITH_HOST_DEVICE constexpr auto& operator[](int i) { return data[i]; }

   SMITH_HOST_DEVICE constexpr const auto& operator[](int i) const { return data[i]; }


   template <int last_dimension = m, typename = typename std::enable_if<last_dimension == 1>::type>

   SMITH_HOST_DEVICE constexpr operator T()

   {

     return data[0];

   }


   template <int last_dimension = m, typename = typename std::enable_if<last_dimension == 1>::type>

   SMITH_HOST_DEVICE constexpr operator T() const

   {

     return data[0];

   }


   T data[m];

 };


 template <typename T, int n1>

 tensor(const T (&data)[n1]) -> tensor<T, n1>;


 template <typename T, int n1, int n2>

 tensor(const T (&data)[n1][n2]) -> tensor<T, n1, n2>;


 using vec2 = tensor<double, 2>;

 using vec3 = tensor<double, 3>;


 using mat2 = tensor<double, 2, 2>;

 using mat3 = tensor<double, 3, 3>;


 struct zero {

   SMITH_HOST_DEVICE operator double() { return 0.0; }


   template <typename T, int... n>

   SMITH_HOST_DEVICE operator tensor<T, n...>()

   {

     return tensor<T, n...>{};

   }


   template <typename... T>

   SMITH_HOST_DEVICE auto operator()(T...) const

   {

     return zero{};

   }


   template <typename T>

   SMITH_HOST_DEVICE auto operator=(T)

   {

     return zero{};

   }

 };


 template <typename T>

 struct is_zero : std::false_type {};


 template <>

 struct is_zero<zero> : std::true_type {};


 SMITH_HOST_DEVICE constexpr auto operator+(zero, zero) { return zero{}; }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator+(zero, T other)

 {

   return other;

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator+(T other, zero)

 {

   return other;

 }


 SMITH_HOST_DEVICE constexpr auto operator-(zero) { return zero{}; }


 SMITH_HOST_DEVICE constexpr auto operator-(zero, zero) { return zero{}; }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator-(zero, T other)

 {

   return -other;

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator-(T other, zero)

 {

   return other;

 }


 SMITH_HOST_DEVICE constexpr auto operator*(zero, zero) { return zero{}; }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator*(zero, T /*other*/)

 {

   return zero{};

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator*(T /*other*/, zero)

 {

   return zero{};

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto operator/(zero, T /*other*/)

 {

   return zero{};

 }


 template <typename T>

 void operator/(T, zero)

 {

   static_assert(::detail::always_false<T>{}, "Error: Can't divide by zero!");

 }


 SMITH_HOST_DEVICE constexpr auto operator+=(zero, zero) { return zero{}; }


 SMITH_HOST_DEVICE constexpr auto operator-=(zero, zero) { return zero{}; }


 template <int i>

 SMITH_HOST_DEVICE zero& get(zero& x)

 {

   return x;

 }


 template <int i>

 SMITH_HOST_DEVICE zero get(const zero&)

 {

   return zero{};

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr zero dot(const T&, zero)

 {

   return zero{};

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr zero dot(zero, const T&)

 {

   return zero{};

 }


 template <typename T, int n1, int n2 = 1>

 using reduced_tensor = std::conditional_t<

     (n1 == 1 && n2 == 1), double,

     std::conditional_t<n1 == 1, tensor<T, n2>, std::conditional_t<n2 == 1, tensor<T, n1>, tensor<T, n1, n2>>>>;


 template <typename T, int... n>

 SMITH_HOST_DEVICE constexpr auto tensor_with_shape(std::integer_sequence<int, n...>)

 {

   return tensor<T, n...>{};

 }


 SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <typename lambda_type>

 SMITH_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f());

   return tensor<T>{f()};

 }


 SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, typename lambda_type>

 SMITH_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1));

   tensor<T, n1> A{};

   for (int i = 0; i < n1; i++) {

     A(i) = f(i);

   }

   return A;

 }


 SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, int n2, typename lambda_type>

 SMITH_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1, n2));

   tensor<T, n1, n2> A{};

   for (int i = 0; i < n1; i++) {

     for (int j = 0; j < n2; j++) {

       A(i, j) = f(i, j);

     }

   }

   return A;

 }


 SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, int n2, int n3, typename lambda_type>

 SMITH_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1, n2, n3));

   tensor<T, n1, n2, n3> A{};

   for (int i = 0; i < n1; i++) {

     for (int j = 0; j < n2; j++) {

       for (int k = 0; k < n3; k++) {

         A(i, j, k) = f(i, j, k);

       }

     }

   }

   return A;

 }


 SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 template <int n1, int n2, int n3, int n4, typename lambda_type>

 SMITH_HOST_DEVICE constexpr auto make_tensor(lambda_type f)

 {

   using T = decltype(f(n1, n2, n3, n4));

   tensor<T, n1, n2, n3, n4> A{};

   for (int i = 0; i < n1; i++) {

     for (int j = 0; j < n2; j++) {

       for (int k = 0; k < n3; k++) {

         for (int l = 0; l < n4; l++) {

           A(i, j, k, l) = f(i, j, k, l);

         }

       }

     }

   }

   return A;

 }


 template <typename S, typename T, int m, int... n>

 SMITH_HOST_DEVICE constexpr auto operator+(const tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   tensor<decltype(S{} + T{}), m, n...> C{};

   for (int i = 0; i < m; i++) {

     C[i] = A[i] + B[i];

   }

   return C;

 }


 template <typename T, int m, int... n>

 SMITH_HOST_DEVICE constexpr auto operator-(const tensor<T, m, n...>& A)

 {

   tensor<T, m, n...> B{};

   for (int i = 0; i < m; i++) {

     B[i] = -A[i];

   }

   return B;

 }


 template <typename S, typename T, int m, int... n>

 SMITH_HOST_DEVICE constexpr auto operator-(const tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   tensor<decltype(S{} + T{}), m, n...> C{};

   for (int i = 0; i < m; i++) {

     C[i] = A[i] - B[i];

   }

   return C;

 }


 template <typename S, typename T, int m, int... n>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   for (int i = 0; i < m; i++) {

     A[i] += B[i];

   }

   return A;

 }


 #if 0

 template <typename T>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<T>& A, const T& B)

 {

   return A.data += B;

 }

 #endif


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<T, n, 1>& A, const tensor<T, n>& B)

 {

   for (int i = 0; i < n; i++) {

     A.data[i][0] += B[i];

   }

   return A;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1, n>& A, const tensor<T, n>& B)

 {

   for (int i = 0; i < n; i++) {

     A.data[0][i] += B[i];

   }

   return A;

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1>& A, const T& B)

 {

   return A.data[0] += B;

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<T, 1, 1>& A, const T& B)

 {

   return A.data[0][0] += B;

 }


 template <typename T, int... n>

 SMITH_HOST_DEVICE constexpr auto& operator+=(tensor<T, n...>& A, zero)

 {

   return A;

 }


 template <typename S, typename T, int m, int... n>

 SMITH_HOST_DEVICE constexpr auto& operator-=(tensor<S, m, n...>& A, const tensor<T, m, n...>& B)

 {

   for (int i = 0; i < m; i++) {

     A[i] -= B[i];

   }

   return A;

 }


 template <typename T, int... n>

 SMITH_HOST_DEVICE constexpr auto& operator-=(tensor<T, n...>& A, zero)

 {

   return A;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto outer(double A, tensor<T, n> B)

 {

   tensor<decltype(double{} * T{}), n> AB{};

   for (int i = 0; i < n; i++) {

     AB[i] = A * B[i];

   }

   return AB;

 }


 template <typename T, int m>

 SMITH_HOST_DEVICE constexpr auto outer(const tensor<T, m>& A, double B)

 {

   tensor<decltype(T{} * double{}), m> AB{};

   for (int i = 0; i < m; i++) {

     AB[i] = A[i] * B;

   }

   return AB;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto outer(zero, const tensor<T, n>&)

 {

   return zero{};

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto outer(const tensor<T, n>&, zero)

 {

   return zero{};

 }


 template <typename S, typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto outer(const tensor<S, m>& A, const tensor<T, n>& B)

 {

   tensor<decltype(S{} * T{}), m, n> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i][j] = A[i] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto inner(const tensor<S, m, n>& A, const tensor<T, m, n>& B)

 {

   decltype(S{} * T{}) sum{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       sum += A[i][j] * B[i][j];

     }

   }

   return sum;

 }


 template <typename S, typename T, int m>

 SMITH_HOST_DEVICE constexpr auto inner(const tensor<S, m>& A, const tensor<T, m>& B)

 {

   decltype(S{} * T{}) sum{};

   for (int i = 0; i < m; i++) {

     sum += A[i] * B[i];

   }

   return sum;

 }


 SMITH_HOST_DEVICE constexpr auto inner(double A, double B) { return A * B; }


 template <typename S, int m, int n>

 SMITH_HOST_DEVICE constexpr auto inner(const tensor<S, m, n>&, zero)

 {

   return zero{};

 }


 template <typename S, int m>

 SMITH_HOST_DEVICE constexpr auto inner(const tensor<S, m>&, zero)

 {

   return zero{};

 }


 SMITH_HOST_DEVICE constexpr auto inner(double, zero) { return zero{}; }


 template <typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto inner(zero, const tensor<T, m, n>&)

 {

   return zero{};

 }


 template <typename T, int m>

 SMITH_HOST_DEVICE constexpr auto inner(zero, const tensor<T, m>&)

 {

   return zero{};

 }


 SMITH_HOST_DEVICE constexpr auto inner(zero, double) { return zero{}; }


 template <typename S, typename T, int m, int n, int p>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p>& B)

 {

   tensor<decltype(S{} * T{}), m, p> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < p; j++) {

       for (int k = 0; k < n; k++) {

         AB[i][j] = AB[i][j] + A[i][k] * B[k][j];

       }

     }

   }

   return AB;

 }


 template <typename T, int m>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<T, m>& A, double B)

 {

   return A * B;

 }


 template <typename T, int m>

 SMITH_HOST_DEVICE constexpr auto dot(double B, const tensor<T, m>& A)

 {

   return B * A;

 }


 template <typename S, typename T, int m>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m>& B)

 {

   decltype(S{} * T{}) AB{};

   for (int i = 0; i < m; i++) {

     AB = AB + A[i] * B[i];

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n>& B)

 {

   tensor<decltype(S{} * T{}), n> AB{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < m; j++) {

       AB[i] = AB[i] + A[j] * B[j][i];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n, p>& B)

 {

   tensor<decltype(S{} * T{}), n, p> AB{};

   for (int j = 0; j < m; j++) {

     AB = AB + A[j] * B[j];

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m>& A, const tensor<T, m, n, p, q>& B)

 {

   tensor<decltype(S{} * T{}), n, p, q> AB{};

   for (int j = 0; j < m; j++) {

     AB = AB + A[j] * B[j];

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n>& B)

 {

   tensor<decltype(S{} * T{}), m> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i] = AB[i] + A[i][j] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p, int q, int r>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p, q, r>& B)

 {

   tensor<decltype(S{} * T{}), m, p, q, r> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i] = AB[i] + A[i][j] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m, n>& A, const tensor<T, n, p, q>& B)

 {

   tensor<decltype(S{} * T{}), m, p, q> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB[i] = AB[i] + A[i][j] * B[j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m, n, p>& A, const tensor<T, p>& B)

 {

   tensor<decltype(S{} * T{}), m, n> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         AB[i][j] += A[i][j][k] * B[k];

       }

     }

   }

   return AB;

 }


 template <typename S, typename T, typename U, int m, int n>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m>& u, const tensor<T, m, n>& A, const tensor<U, n>& v)

 {

   decltype(S{} * T{} * U{}) uAv{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       uAv += u[i] * A[i][j] * v[j];

     }

   }

   return uAv;

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SMITH_HOST_DEVICE constexpr auto dot(const tensor<S, m, n, p, q>& A, const tensor<T, q>& B)

 {

   tensor<decltype(S{} * T{}), m, n, p> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         for (int l = 0; l < q; l++) {

           AB[i][j][k] += A[i][j][k][l] * B[l];

         }

       }

     }

   }

   return AB;

 }


 template <typename T>

 auto cross(const tensor<T, 3, 2>& A)

 {

   return tensor<T, 3>{A(1, 0) * A(2, 1) - A(2, 0) * A(1, 1), A(2, 0) * A(0, 1) - A(0, 0) * A(2, 1),

                       A(0, 0) * A(1, 1) - A(1, 0) * A(0, 1)};

 }


 template <typename T>

 auto cross(const tensor<T, 2, 1>& v)

 {

   return tensor<T, 2>{v(1, 0), -v(0, 0)};

 }


 template <typename T>

 auto cross(const tensor<T, 2>& v)

 {

   return tensor<T, 2>{v[1], -v[0]};

 }


 template <typename S, typename T>

 auto cross(const tensor<S, 3>& u, const tensor<T, 3>& v)

 {

   return tensor<decltype(S{} * T{}), 3>{u(1) * v(2) - u(2) * v(1), u(2) * v(0) - u(0) * v(2),

                                         u(0) * v(1) - u(1) * v(0)};

 }


 template <typename S, typename T, int m, int n, int p, int q>

 SMITH_HOST_DEVICE constexpr auto double_dot(const tensor<S, m, n, p, q>& A, const tensor<T, p, q>& B)

 {

   tensor<decltype(S{} * T{}), m, n> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         for (int l = 0; l < q; l++) {

           AB[i][j] += A[i][j][k][l] * B[k][l];

         }

       }

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n, int p>

 SMITH_HOST_DEVICE constexpr auto double_dot(const tensor<S, m, n, p>& A, const tensor<T, n, p>& B)

 {

   tensor<decltype(S{} * T{}), m> AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       for (int k = 0; k < p; k++) {

         AB[i] += A[i][j][k] * B[j][k];

       }

     }

   }

   return AB;

 }


 template <typename S, typename T, int m, int n>

 constexpr auto double_dot(const tensor<S, m, n>& A, const tensor<T, m, n>& B)

 {

   decltype(S{} * T{}) AB{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       AB += A[i][j] * B[i][j];

     }

   }

   return AB;

 }


 template <typename S, typename T, int... m, int... n>

 SMITH_HOST_DEVICE constexpr auto operator*(const tensor<S, m...>& A, const tensor<T, n...>& B)

 {

   return dot(A, B);

 }


 template <typename T, int m>

 SMITH_HOST_DEVICE constexpr auto squared_norm(const tensor<T, m>& A)

 {

   T total{};

   for (int i = 0; i < m; i++) {

     total += A[i] * A[i];

   }

   return total;

 }


 template <typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto squared_norm(const tensor<T, m, n>& A)

 {

   T total{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       total += A[i][j] * A[i][j];

     }

   }

   return total;

 }


 template <typename T, int... n>

 SMITH_HOST_DEVICE constexpr auto squared_norm(const tensor<T, n...>& A)

 {

   T total{};

   for_constexpr<n...>([&](auto... i) { total += A(i...) * A(i...); });

   return total;

 }


 template <typename T, int... n>

 SMITH_HOST_DEVICE auto norm(const tensor<T, n...>& A)

 {

   using std::sqrt;

   return sqrt(squared_norm(A));

 }


 SMITH_HOST_DEVICE constexpr auto norm(zero) { return zero{}; }


 template <typename T, int... n>

 SMITH_HOST_DEVICE auto normalize(const tensor<T, n...>& A)

 {

   return A / norm(A);

 }


 template <typename T>

 SMITH_HOST_DEVICE tensor<T, 3, 3> to_3x3(const tensor<T, 2, 2>& A)

 {

   tensor<T, 3, 3> output{};

   output[0][0] = A[0][0];

   output[0][1] = A[0][1];

   output[1][0] = A[1][0];

   output[1][1] = A[1][1];

   return output;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto tr(const tensor<T, n, n>& A)

 {

   T trA{};

   for (int i = 0; i < n; i++) {

     trA = trA + A[i][i];

   }

   return trA;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto sym(const tensor<T, n, n>& A)

 {

   tensor<T, n, n> symA{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < n; j++) {

       symA[i][j] = 0.5 * (A[i][j] + A[j][i]);

     }

   }

   return symA;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto antisym(const tensor<T, n, n>& A)

 {

   tensor<T, n, n> antisymA{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < n; j++) {

       antisymA[i][j] = 0.5 * (A[i][j] - A[j][i]);

     }

   }

   return antisymA;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto dev(const tensor<T, n, n>& A)

 {

   auto devA = A;

   auto trA = tr(A);

   for (int i = 0; i < n; i++) {

     devA[i][i] -= trA / n;

   }

   return devA;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto diagonal_matrix(const tensor<T, n, n>& A)

 {

   tensor<T, n, n> D{};

   for (int i = 0; i < n; i++) {

     D[i][i] = A[i][i];

   }

   return D;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr tensor<T, n, n> diag(const tensor<T, n>& d)

 {

   tensor<T, n, n> D{};

   for (int i = 0; i < n; i++) {

     D[i][i] = d[i];

   }

   return D;

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr tensor<T, n> diag(const tensor<T, n, n>& D)

 {

   tensor<T, n> d{};

   for (int i = 0; i < n; i++) {

     d[i] = D[i][i];

   }

   return d;

 }


 template <int dim>

 SMITH_HOST_DEVICE constexpr tensor<double, dim, dim> DenseIdentity()

 {

   tensor<double, dim, dim> I{};

   for (int i = 0; i < dim; i++) {

     for (int j = 0; j < dim; j++) {

       I[i][j] = (i == j);

     }

   }

   return I;

 }


 template <typename T, int m, int n>

 SMITH_HOST_DEVICE constexpr auto transpose(const tensor<T, m, n>& A)

 {

   tensor<T, n, m> AT{};

   for (int i = 0; i < n; i++) {

     for (int j = 0; j < m; j++) {

       AT[i][j] = A[j][i];

     }

   }

   return AT;

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto I2(const tensor<T, 3, 3>& A)

 {

   return +A[0][0] * A[1][1] + A[1][1] * A[2][2] + A[2][2] * A[0][0] - A[0][1] * A[1][0] - A[1][2] * A[2][1] -

          A[2][0] * A[0][2];

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto det(const tensor<T, 2, 2>& A)

 {

   return A[0][0] * A[1][1] - A[0][1] * A[1][0];

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto det(const tensor<T, 3, 3>& A)

 {

   return A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] + A[0][2] * A[1][0] * A[2][1] -

          A[0][0] * A[1][2] * A[2][1] - A[0][1] * A[1][0] * A[2][2] - A[0][2] * A[1][1] * A[2][0];

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto detApIm1(const tensor<T, 2, 2>& A)

 {

   // From the Cayley-Hamilton theorem, we get that for any N by N matrix A,

   // det(A - I) - 1 = I1(A) + I2(A) + ... + IN(A),

   // where the In are the principal invariants of A.

   // We inline the definitions of the principal invariants to increase computational speed.


   // equivalent to tr(A) + det(A)

   return A(0, 0) - A(0, 1) * A(1, 0) + A(1, 1) + A(0, 0) * A(1, 1);

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto detApIm1(const tensor<T, 3, 3>& A)

 {

   // For notes on the implementation, see the 2x2 version.


   // clang-format off

   // equivalent to tr(A) + I2(A) + det(A)

   return A(0, 0) + A(1, 1) + A(2, 2)

        - A(0, 1) * A(1, 0) * (1 + A(2, 2))

        + A(0, 0) * A(1, 1) * (1 + A(2, 2))

        - A(0, 2) * A(2, 0) * (1 + A(1, 1))

        - A(1, 2) * A(2, 1) * (1 + A(0, 0))

        + A(0, 0) * A(2, 2)

        + A(1, 1) * A(2, 2)

        + A(0, 1) * A(1, 2) * A(2, 0)

        + A(0, 2) * A(1, 0) * A(2, 1);

   // clang-format on

 }


 template <typename T, int dim>

 auto matrix_sqrt(const tensor<T, dim, dim>& A)

 {

   auto B = A;

   for (int i = 0; i < 15; i++) {

     B = 0.5 * (B + dot(A, inv(B)));

   }

   return B;

 }


 template <int i1, int i2, typename S, int m, int... n, typename T, int p, int q>

 SMITH_HOST_DEVICE auto contract(const tensor<S, m, n...>& A, const tensor<T, p, q>& B)

 {

   constexpr int Adims[] = {m, n...};

   constexpr int Bdims[] = {p, q};

   static_assert(sizeof...(n) < 3);

   static_assert(Adims[i1] == Bdims[i2], "error: incompatible tensor dimensions");


   // first, we have to figure out the dimensions of the output tensor

   constexpr int new_dim = (i2 == 0) ? q : p;

   constexpr int d1 = (i1 == 0) ? new_dim : Adims[0];

   constexpr int d2 = (i1 == 1) ? new_dim : Adims[1];

   constexpr int d3 = sizeof...(n) == 1 ? 0 : ((i1 == 2) ? new_dim : Adims[2]);


   // the type of the output tensor is easier to figure out

   using U = decltype(S{} * T{});


   auto C = []() {

     if constexpr (d3 == 0) return tensor<U, d1, d2>{};

     if constexpr (d3 != 0) return tensor<U, d1, d2, d3>{};

   }();


   if constexpr (d3 == 0) {

     for (int i = 0; i < d1; i++) {

       for (int j = 0; j < d2; j++) {

         U sum{};

         for (int k = 0; k < Adims[i1]; k++) {

           if constexpr (i1 == 0 && i2 == 0) sum += A(k, j) * B(k, i);

           if constexpr (i1 == 1 && i2 == 0) sum += A(i, k) * B(k, j);

           if constexpr (i1 == 0 && i2 == 1) sum += A(k, j) * B(i, k);

           if constexpr (i1 == 1 && i2 == 1) sum += A(i, k) * B(j, k);

         }

         C(i, j) = sum;

       }

     }

   } else {

     for (int i = 0; i < d1; i++) {

       for (int j = 0; j < d2; j++) {

         for (int k = 0; k < d3; k++) {

           U sum{};

           for (int l = 0; l < Adims[i1]; l++) {

             if constexpr (i1 == 0 && i2 == 0) sum += A(l, j, k) * B(l, i);

             if constexpr (i1 == 1 && i2 == 0) sum += A(i, l, k) * B(l, j);

             if constexpr (i1 == 2 && i2 == 0) sum += A(i, j, l) * B(l, k);

             if constexpr (i1 == 0 && i2 == 1) sum += A(l, j, k) * B(i, l);

             if constexpr (i1 == 1 && i2 == 1) sum += A(i, l, k) * B(j, l);

             if constexpr (i1 == 2 && i2 == 1) sum += A(i, j, l) * B(k, l);

           }

           C(i, j, k) = sum;

         }

       }

     }

   }


   return C;

 }


 template <int i1, int i2, typename T>

 SMITH_HOST_DEVICE auto contract(const zero&, const T&)

 {

   return zero{};

 }


 template <typename T, int... n>

 double relative_error(tensor<T, n...> A, tensor<T, n...> B)

 {

   return norm(A - B) / norm(A);

 }


 template <int n>

 SMITH_HOST_DEVICE bool is_symmetric(tensor<double, n, n> A, double tolerance = 1.0e-8)

 {

   for (int i = 0; i < n; ++i) {

     for (int j = i + 1; j < n; ++j) {

       if (std::abs(A(i, j) - A(j, i)) > tolerance) {

         return false;

       };

     }

   }

   return true;

 }


 inline SMITH_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor<double, 2, 2> A)

 {

   if (!is_symmetric(A)) {

     return false;

   }

   if (A(0, 0) < 0.0) {

     return false;

   }

   if (det(A) < 0.0) {

     return false;

   }

   return true;

 }

 inline SMITH_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor<double, 3, 3> A)

 {

   if (!is_symmetric(A)) {

     return false;

   }

   if (det(A) < 0.0) {

     return false;

   }

   auto subtensor = make_tensor<2, 2>([A](int i, int j) { return A(i, j); });

   if (!is_symmetric_and_positive_definite(subtensor)) {

     return false;

   }

   return true;

 }


 template <typename T, int n>

 struct LuFactorization {

   tensor<int, n> P;

   tensor<T, n, n> L;

   tensor<T, n, n> U;

 };


 template <typename T, int n, int... m>

 SMITH_HOST_DEVICE constexpr auto solve_lower_triangular(const tensor<T, n, n>& L, const tensor<T, n, m...>& b,

                                                         const tensor<int, n>& P)

 {

   tensor<T, n, m...> y{};

   for (int i = 0; i < n; i++) {

     auto c = b[P[i]];

     for (int j = 0; j < i; j++) {

       c -= L[i][j] * y[j];

     }

     y[i] = c / L[i][i];

   }

   return y;

 }


 template <typename T, int n, int... m>

 SMITH_HOST_DEVICE constexpr auto solve_lower_triangular(const tensor<T, n, n>& L, const tensor<T, n, m...>& b)

 {

   // no permutation provided, so just map each equation to itself

   // TODO make a convienience function for ranges like this

   // BT 05/09/2022

   tensor<int, n> P(make_tensor<n>([](auto i) { return i; }));


   return solve_lower_triangular(L, b, P);

 }


 template <typename T, int n, int... m>

 SMITH_HOST_DEVICE constexpr auto solve_upper_triangular(const tensor<T, n, n>& U, const tensor<T, n, m...>& y)

 {

   tensor<T, n, m...> x{};

   for (int i = n - 1; i >= 0; i--) {

     auto c = y[i];

     for (int j = i + 1; j < n; j++) {

       c -= U[i][j] * x[j];

     }

     x[i] = c / U[i][i];

   }

   return x;

 }


 template <typename S, typename T, int n, int... m>

 SMITH_HOST_DEVICE constexpr auto linear_solve(const LuFactorization<S, n>& lu_factors, const tensor<T, n, m...>& b)

 {

   // Forward substitution

   // solve Ly = b

   const auto y = solve_lower_triangular(lu_factors.L, b, lu_factors.P);


   // Back substitution

   // Solve Ux = y

   return solve_upper_triangular(lu_factors.U, y);

 }


 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto linear_solve(const LuFactorization<T, n>& /* lu_factors */, const zero /* b */)

 {

   return zero{};

 }


 SMITH_HOST_DEVICE constexpr tensor<double, 2, 2> inv(const tensor<double, 2, 2>& A)

 {

   double inv_detA(1.0 / det(A));


   tensor<double, 2, 2> invA{};


   invA[0][0] = A[1][1] * inv_detA;

   invA[0][1] = -A[0][1] * inv_detA;

   invA[1][0] = -A[1][0] * inv_detA;

   invA[1][1] = A[0][0] * inv_detA;


   return invA;

 }


 SMITH_HOST_DEVICE constexpr tensor<double, 3, 3> inv(const tensor<double, 3, 3>& A)

 {

   double inv_detA(1.0 / det(A));


   tensor<double, 3, 3> invA{};


   invA[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) * inv_detA;

   invA[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) * inv_detA;

   invA[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) * inv_detA;

   invA[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) * inv_detA;

   invA[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) * inv_detA;

   invA[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) * inv_detA;

   invA[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) * inv_detA;

   invA[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) * inv_detA;

   invA[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) * inv_detA;


   return invA;

 }

 template <typename T, int n>

 SMITH_HOST_DEVICE constexpr auto inv(const tensor<T, n, n>& A)

 {

   auto I = DenseIdentity<n>();

   return linear_solve(A, I);

 }


 template <typename T, int m, int... n>

 auto& operator<<(std::ostream& out, const tensor<T, m, n...>& A)

 {

   out << '{' << A[0];

   for (int i = 1; i < m; i++) {

     out << ", " << A[i];

   }

   out << '}';

   return out;

 }


 inline auto& operator<<(std::ostream& out, zero)

 {

   out << "zero";

   return out;

 }


 inline SMITH_HOST_DEVICE void print(double value) { printf("%f", value); }


 template <int m, int... n>

 SMITH_HOST_DEVICE void print(const tensor<double, m, n...>& A)

 {

   printf("{");

   print(A[0]);

   for (int i = 1; i < m; i++) {

     printf(",");

     print(A[i]);

   }

   printf("}");

 }


 template <int n>

 SMITH_HOST_DEVICE constexpr auto chop(const tensor<double, n>& A)

 {

   auto copy = A;

   for (int i = 0; i < n; i++) {

     if (copy[i] * copy[i] < 1.0e-20) {

       copy[i] = 0.0;

     }

   }

   return copy;

 }


 template <int m, int n>

 SMITH_HOST_DEVICE constexpr auto chop(const tensor<double, m, n>& A)

 {

   auto copy = A;

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       if (copy[i][j] * copy[i][j] < 1.0e-20) {

         copy[i][j] = 0.0;

       }

     }

   }

   return copy;

 }


 namespace detail {


 template <typename T1, typename T2>

 struct outer_prod;


 template <int... m, int... n>

 struct outer_prod<tensor<double, m...>, tensor<double, n...>> {

   using type = tensor<double, m..., n...>;

 };


 template <int... n>

 struct outer_prod<double, tensor<double, n...>> {

   using type = tensor<double, n...>;

 };


 template <int... n>

 struct outer_prod<tensor<double, n...>, double> {

   using type = tensor<double, n...>;

 };


 template <>

 struct outer_prod<double, double> {

   using type = tensor<double>;

 };


 template <typename T>

 struct outer_prod<zero, T> {

   using type = zero;

 };


 template <typename T>

 struct outer_prod<T, zero> {

   using type = zero;

 };


 }  // namespace detail


 template <typename T1, typename T2>

 using outer_product_t = typename detail::outer_prod<T1, T2>::type;


 inline SMITH_HOST_DEVICE auto get_gradient(double /* arg */) { return zero{}; }


 template <int... n>

 SMITH_HOST_DEVICE constexpr auto get_gradient(const tensor<double, n...>& /* arg */)

 {

   return zero{};

 }


 SMITH_HOST_DEVICE constexpr auto chain_rule(const zero /* df_dx */, const zero /* dx */) { return zero{}; }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto chain_rule(const zero /* df_dx */, const T /* dx */)

 {

   return zero{};

 }


 template <typename T>

 SMITH_HOST_DEVICE constexpr auto chain_rule(const T /* df_dx */, const zero /* dx */)

 {

   return zero{};

 }


 SMITH_HOST_DEVICE constexpr auto chain_rule(const double df_dx, const double dx) { return df_dx * dx; }


 template <int... n>

 SMITH_HOST_DEVICE constexpr auto chain_rule(const tensor<double, n...>& df_dx, const double dx)

 {

   return df_dx * dx;

 }


 template <int... n>

 SMITH_HOST_DEVICE constexpr auto chain_rule(const tensor<double, n...>& df_dx, const tensor<double, n...>& dx)

 {

   double total{};

   for_constexpr<n...>([&](auto... i) { total += df_dx(i...) * dx(i...); });

   return total;

 }


 template <int m, int... n>

 SMITH_HOST_DEVICE constexpr auto chain_rule(const tensor<double, m, n...>& df_dx, const tensor<double, n...>& dx)

 {

   tensor<double, m> total{};

   for (int i = 0; i < m; i++) {

     total[i] = chain_rule(df_dx[i], dx);

   }

   return total;

 }


 template <int m, int n, int... p>

 SMITH_HOST_DEVICE auto chain_rule(const tensor<double, m, n, p...>& df_dx, const tensor<double, p...>& dx)

 {

   tensor<double, m, n> total{};

   for (int i = 0; i < m; i++) {

     for (int j = 0; j < n; j++) {

       total[i][j] = chain_rule(df_dx[i][j], dx);

     }

   }

   return total;

 }


 template <typename T, int... n>

 SMITH_HOST_DEVICE constexpr int size(const tensor<T, n...>&)

 {

   return (n * ... * 1);

 }


 SMITH_HOST_DEVICE constexpr int size(const double&) { return 1; }


 SMITH_HOST_DEVICE constexpr int size(zero) { return 0; }


 template <int i, typename T, int... n>

 SMITH_HOST_DEVICE constexpr int dimension(const tensor<T, n...>&)

 {

   constexpr int dimensions[] = {n...};

   return dimensions[i];

 }


 template <typename T, int m, int... n>

 SMITH_HOST_DEVICE constexpr int leading_dimension(tensor<T, m, n...>)

 {

   return m;

 }


 template <typename T, int... n>

 bool isnan(const tensor<T, n...>& A)

 {

   bool found_nan = false;

   for_constexpr<n...>([&](auto... i) { found_nan |= std::isnan(A(i...)); });

   return found_nan;

 }


 inline bool isnan(const zero&) { return false; }


 }  // namespace smith


 #if 0


 inline float angle_between(const vec < 2 > & a, const vec < 2 > & b) {

   return acos(clip(dot(normalize(a), normalize(b)), -1.0f, 1.0f));

 }


 inline float angle_between(const vec < 3 > & a, const vec < 3 > & b) {

   return acos(clip(dot(normalize(a), normalize(b)), -1.0f, 1.0f));

 }


 // angle between proper orthogonal matrices

 inline float angle_between(const mat < 3, 3 > & U, const mat < 3, 3 > & V) {

   return acos(0.5f * (tr(dot(U, transpose(V))) - 1.0f));

 }


 inline mat < 2, 2 > rotation(const float theta) {

   return mat< 2, 2 >{

     {cos(theta), -sin(theta)},

     { sin(theta), cos(theta) }

   };

 }


 inline mat < 3, 3 > axis_to_rotation(const vec < 3 > & omega) {


   float norm_omega = norm(omega);


   if (fabs(norm_omega) < 0.000001f) {


     return eye< 3 >();


   } else {


     vec3 u = omega / norm_omega;


     float c = cos(norm_omega);

     float s = sin(norm_omega);


     return mat < 3, 3 >{

       {

         u[0]*u[0]*(1.0f - c) + c,

         u[0]*u[1]*(1.0f - c) - u[2]*s,

         u[0]*u[2]*(1.0f - c) + u[1]*s

       },{

         u[1]*u[0]*(1.0f - c) + u[2]*s,

         u[1]*u[1]*(1.0f - c) + c,

         u[1]*u[2]*(1.0f - c) - u[0]*s

       },{

         u[2]*u[0]*(1.0f - c) - u[1]*s,

         u[2]*u[1]*(1.0f - c) + u[0]*s,

         u[2]*u[2]*(1.0f - c) + c

       }

     };


   }


 }


 // assumes R is a proper-orthogonal matrix

 inline vec < 3 > rotation_to_axis(const mat < 3, 3 > & R) {


   float theta = acos(clip(0.5f * (tr(R) - 1.0f), -1.0f, 1.0f));


   float scale;


   // for small angles, prefer series expansion to division by sin(theta) ~ 0

   if (fabs(theta) < 0.00001f) {

     scale = 0.5f + theta * theta / 12.0f;

   }

   else {

     scale = 0.5f * theta / sin(theta);

   }


   return vec3{ R(2,1) - R(1,2), R(0,2) - R(2,0), R(1,0) - R(0,1) } *scale;


 }


 inline mat < 3, 3 > look_at(const vec < 3 > & direction, const vec < 3 > & up = vec3{ 0.0f, 0.0f, 1.0f }) {

   vec3 f = normalize(direction);

   vec3 u = normalize(cross(f, cross(up, f)));

   vec3 l = normalize(cross(u, f));


   return mat3{

     {f[0], l[0], u[0]},

     {f[1], l[1], u[1]},

     {f[2], l[2], u[2]}

   };

 }


 inline mat < 2, 2 > look_at(const vec < 2 > & direction) {

   vec2 f = normalize(direction);

   vec2 l = cross(f);


   return mat2{

     {f[0], l[0]},

     {f[1], l[1]},

   };

 }


 inline mat < 3, 3 > R3_basis(const vec3 & n) {

   float sign = (n[2] >= 0.0f) ? 1.0f : -1.0f;

   float a = -1.0f / (sign + n[2]);

   float b = n[0] * n[1] * a;


   return mat < 3, 3 >{

     {

       1.0f + sign * n[0] * n[0] * a,

       b,

       n[0],

     },{

       sign * b,

       sign + n[1] * n[1] * a,

       n[1]

     },{

       -sign * n[0],

       -n[1],

       n[2]

     }

   };

 }

 #endif


 #include "smith/numerics/functional/isotropic_tensor.hpp"


 #include "smith/numerics/functional/tuple_tensor_dual_functions.hpp"

accelerator.hpp
This file contains the interface used for initializing/terminating any hardware accelerator-related f...

SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING
#define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING
Macro to turn off specific nvcc warnings.
Definition: accelerator.hpp:50

SMITH_HOST_DEVICE
#define SMITH_HOST_DEVICE
Macro that evaluates to __host__ __device__ when compiling with nvcc and does nothing on a host compi...
Definition: accelerator.hpp:38

isotropic_tensor.hpp
Implementation of isotropic tensor classes.

metaprogramming.hpp
Utilities for C++ metaprogramming.

for_constexpr
constexpr SMITH_HOST_DEVICE void for_constexpr(const lambda &f)
multidimensional loop tool that evaluates the lambda body inside the innermost loop.
Definition: metaprogramming.hpp:96

smith
Accelerator functionality.
Definition: smith.cpp:36

smith::is_symmetric
SMITH_HOST_DEVICE bool is_symmetric(tensor< double, n, n > A, double tolerance=1.0e-8)
Return whether a square rank 2 tensor is symmetric.
Definition: tensor.hpp:1485

smith::acos
SMITH_HOST_DEVICE auto acos(dual< gradient_type > a)
implementation of acos for dual numbers
Definition: dual.hpp:373

smith::is_symmetric_and_positive_definite
SMITH_HOST_DEVICE bool is_symmetric_and_positive_definite(tensor< double, 3, 3 > A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1519

smith::antisym
constexpr SMITH_HOST_DEVICE auto antisym(const tensor< T, n, n > &A)
Returns the antisymmetric part of a square matrix.
Definition: tensor.hpp:1174

smith::operator+=
constexpr SMITH_HOST_DEVICE auto & operator+=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (+) for dual numbers
Definition: dual.hpp:182

smith::operator-=
constexpr SMITH_HOST_DEVICE auto & operator-=(tensor< T, n... > &A, zero)
compound assignment (-) between a tensor and zero (no-op)
Definition: tensor.hpp:586

smith::make_tensor
SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING constexpr SMITH_HOST_DEVICE auto make_tensor(lambda_type f)
Creates a tensor of requested dimension by subsequent calls to a functor Can be thought of as analogo...
Definition: tensor.hpp:297

smith::get
constexpr T & get(variant< T0, T1 > &v)
Returns the variant member of specified type.
Definition: variant.hpp:338

smith::size
constexpr SMITH_HOST_DEVICE int size(zero)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1944

smith::det
constexpr SMITH_HOST_DEVICE auto det(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1302

smith::relative_error
double relative_error(tensor< T, n... > A, tensor< T, n... > B)
computes the relative error (in the frobenius norm) between two tensors of the same shape
Definition: tensor.hpp:1471

smith::leading_dimension
constexpr SMITH_HOST_DEVICE int leading_dimension(tensor< T, m, n... >)
a function for querying the first dimension of a tensor
Definition: tensor.hpp:1970

smith::operator*
constexpr SMITH_HOST_DEVICE auto operator*(const dual< gradient_type > &a, double b)
multiplication of a dual number and a non-dual number
Definition: dual.hpp:108

smith::get_gradient
constexpr SMITH_HOST_DEVICE auto get_gradient(const tensor< double, n... > &)
get the gradient of type tensor (note: since its stored type is not a dual number,...
Definition: tensor.hpp:1836

smith::operator*
constexpr SMITH_HOST_DEVICE auto operator*(const tensor< S, m... > &A, const tensor< T, n... > &B)
this is a shorthand for dot(A, B)
Definition: tensor.hpp:1052

smith::contract
SMITH_HOST_DEVICE auto contract(const zero &, const T &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1456

smith::chain_rule
SMITH_HOST_DEVICE auto chain_rule(const tensor< double, m, n, p... > &df_dx, const tensor< double, p... > &dx)
Definition: tensor.hpp:1913

smith::I2
constexpr SMITH_HOST_DEVICE auto I2(const tensor< T, 3, 3 > &A)
Returns the second invariant of a 3x3 matrix.
Definition: tensor.hpp:1284

smith::sqrt
SMITH_HOST_DEVICE auto sqrt(dual< gradient_type > x)
implementation of square root for dual numbers
Definition: dual.hpp:308

smith::inner
constexpr SMITH_HOST_DEVICE auto inner(zero, double)
Definition: tensor.hpp:750

smith::type
constexpr SMITH_HOST_DEVICE auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition: tuple.hpp:376

smith::solve_lower_triangular
constexpr SMITH_HOST_DEVICE auto solve_lower_triangular(const tensor< T, n, n > &L, const tensor< T, n, m... > &b)
Definition: tensor.hpp:1580

smith::norm
constexpr SMITH_HOST_DEVICE auto norm(zero)
overload of Frobenius norm for zero type
Definition: tensor.hpp:1107

smith::operator+
constexpr SMITH_HOST_DEVICE auto operator+(dual< gradient_type > a, double b)
addition of a dual number and a non-dual number
Definition: dual.hpp:59

smith::squared_norm
constexpr SMITH_HOST_DEVICE auto squared_norm(const tensor< T, n... > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1086

smith::vec3
tensor< double, 3 > vec3
statically sized vector of 3 doubles
Definition: tensor.hpp:114

smith::operator<<
auto & operator<<(std::ostream &out, zero)
Write a zero out to an output stream.
Definition: tensor.hpp:1717

smith::cos
SMITH_HOST_DEVICE auto cos(dual< gradient_type > a)
implementation of cosine for dual numbers
Definition: dual.hpp:316

smith::reduced_tensor
std::conditional_t<(n1==1 &&n2==1), double, std::conditional_t< n1==1, tensor< T, n2 >, std::conditional_t< n2==1, tensor< T, n1 >, tensor< T, n1, n2 > >> > reduced_tensor
Removes 1s from tensor dimensions For example, a tensor<T, 1, 10> is equivalent to a tensor<T,...
Definition: tensor.hpp:272

smith::chop
constexpr SMITH_HOST_DEVICE auto chop(const tensor< double, m, n > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1764

smith::diag
constexpr SMITH_HOST_DEVICE tensor< T, n > diag(const tensor< T, n, n > &D)
Returns an array containing the diagonal entries of a square matrix.
Definition: tensor.hpp:1238

smith::isnan
bool isnan(const zero &)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1985

smith::tensor
tensor(const T(&data)[n1]) -> tensor< T, n1 >
class template argument deduction guide for type tensor.

smith::double_dot
constexpr auto double_dot(const tensor< S, m, n > &A, const tensor< T, m, n > &B)
Definition: tensor.hpp:1037

smith::inv
constexpr SMITH_HOST_DEVICE auto inv(const tensor< T, n, n > &A)
Definition: tensor.hpp:1687

smith::cross
auto cross(const tensor< S, 3 > &u, const tensor< T, 3 > &v)
compute the (right handed) cross product of two 3-vectors
Definition: tensor.hpp:981

smith::dev
constexpr SMITH_HOST_DEVICE auto dev(const tensor< T, n, n > &A)
Calculates the deviator of a matrix (rank-2 tensor)
Definition: tensor.hpp:1193

smith::matrix_sqrt
auto matrix_sqrt(const tensor< T, dim, dim > &A)
compute the matrix square root of a square, real-valued, symmetric matrix i.e. given A,...
Definition: tensor.hpp:1359

smith::abs
SMITH_HOST_DEVICE auto abs(dual< gradient_type > x)
Implementation of absolute value function for dual numbers.
Definition: dual.hpp:219

smith::tensor
tensor(const T(&data)[n1][n2]) -> tensor< T, n1, n2 >
class template argument deduction guide for type tensor.

smith::to_3x3
SMITH_HOST_DEVICE tensor< T, 3, 3 > to_3x3(const tensor< T, 2, 2 > &A)
promotes a 2x2 matrix to a 3x3 matrix, by populating the upper left block, leaving zeroes in the thir...
Definition: tensor.hpp:1126

smith::print
SMITH_HOST_DEVICE void print(const tensor< double, m, n... > &A)
print a tensor using printf, so that it is suitable for use inside cuda kernels.
Definition: tensor.hpp:1735

smith::sym
constexpr SMITH_HOST_DEVICE auto sym(const tensor< T, n, n > &A)
Returns the symmetric part of a square matrix.
Definition: tensor.hpp:1157

smith::outer_product_t
typename detail::outer_prod< T1, T2 >::type outer_product_t
a type function that returns the tensor type of an outer product of two tensors
Definition: tensor.hpp:1822

smith::tensor_with_shape
constexpr SMITH_HOST_DEVICE auto tensor_with_shape(std::integer_sequence< int, n... >)
Creates a tensor given the dimensions in a std::integer_sequence.
Definition: tensor.hpp:280

smith::dot
constexpr SMITH_HOST_DEVICE auto dot(const tensor< S, m, n, p, q > &A, const tensor< T, q > &B)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:942

smith::outer
constexpr SMITH_HOST_DEVICE auto outer(const tensor< S, m > &A, const tensor< T, n > &B)
Definition: tensor.hpp:644

smith::operator+=
constexpr SMITH_HOST_DEVICE auto & operator+=(tensor< T, n... > &A, zero)
compound assignment (+) between a tensor and zero (no-op)
Definition: tensor.hpp:557

smith::linear_solve
constexpr SMITH_HOST_DEVICE auto linear_solve(const LuFactorization< T, n > &, const zero)
Definition: tensor.hpp:1635

smith::vec2
tensor< double, 2 > vec2
statically sized vector of 2 doubles
Definition: tensor.hpp:113

smith::dimension
constexpr SMITH_HOST_DEVICE int dimension(const tensor< T, n... > &)
a function for querying the ith dimension of a tensor
Definition: tensor.hpp:1955

smith::mat2
tensor< double, 2, 2 > mat2
statically sized 2x2 matrix of doubles
Definition: tensor.hpp:116

smith::transpose
constexpr SMITH_HOST_DEVICE auto transpose(const tensor< T, m, n > &A)
Returns the transpose of the matrix.
Definition: tensor.hpp:1268

smith::DenseIdentity
constexpr SMITH_HOST_DEVICE tensor< double, dim, dim > DenseIdentity()
Obtains the identity matrix of the specified dimension.
Definition: tensor.hpp:1252

smith::operator-=
constexpr SMITH_HOST_DEVICE auto & operator-=(dual< gradient_type > &a, const dual< gradient_type > &b)
compound assignment (-) for dual numbers
Definition: dual.hpp:191

smith::sin
SMITH_HOST_DEVICE auto sin(dual< gradient_type > a)
implementation of sine for dual numbers
Definition: dual.hpp:324

smith::normalize
SMITH_HOST_DEVICE auto normalize(const tensor< T, n... > &A)
Normalizes the tensor Each element is divided by the Frobenius norm of the tensor,...
Definition: tensor.hpp:1115

smith::detApIm1
constexpr SMITH_HOST_DEVICE auto detApIm1(const tensor< T, 3, 3 > &A)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: tensor.hpp:1332

smith::tr
constexpr SMITH_HOST_DEVICE auto tr(const tensor< T, n, n > &A)
Returns the trace of a square matrix.
Definition: tensor.hpp:1142

smith::dot
constexpr SMITH_HOST_DEVICE auto dot(const isotropic_tensor< S, m, m > &I, const tensor< T, m, n... > &A)
dot product between an isotropic and (nonisotropic) tensor
Definition: isotropic_tensor.hpp:203

smith::operator-
constexpr SMITH_HOST_DEVICE auto operator-(const tensor< S, m, n... > &A, const tensor< T, m, n... > &B)
return the difference of two tensors
Definition: tensor.hpp:456

smith::solve_upper_triangular
constexpr SMITH_HOST_DEVICE auto solve_upper_triangular(const tensor< T, n, n > &U, const tensor< T, n, m... > &y)
Solves an upper triangular system Ux = y.
Definition: tensor.hpp:1601

smith::operator/
constexpr SMITH_HOST_DEVICE auto operator/(const dual< gradient_type > &a, double b)
division of a dual number by a non-dual number
Definition: dual.hpp:129

smith::mat3
tensor< double, 3, 3 > mat3
statically sized 3x3 matrix of doubles
Definition: tensor.hpp:117

smith::diagonal_matrix
constexpr SMITH_HOST_DEVICE auto diagonal_matrix(const tensor< T, n, n > &A)
Returns a square matrix (rank-2 tensor) containing the diagonal entries of the input square matrix wi...
Definition: tensor.hpp:1210

smith::operator-
Domain operator-(const Domain &a, const Domain &b)
create a new domain that is the set difference of a and b
Definition: domain.cpp:761

smith::LuFactorization
Representation of an LU factorization.
Definition: tensor.hpp:1540

smith::LuFactorization::U
tensor< T, n, n > U
Upper triangular factor.
Definition: tensor.hpp:1543

smith::LuFactorization::P
tensor< int, n > P
Row permutation indices due to partial pivoting.
Definition: tensor.hpp:1541

smith::LuFactorization::L
tensor< T, n, n > L
Lower triangular factor. Has ones on diagonal.
Definition: tensor.hpp:1542

smith::is_zero
checks if a type is zero
Definition: tensor.hpp:150

smith::tensor
Arbitrary-rank tensor class.
Definition: tensor.hpp:28

smith::zero
A sentinel struct for eliding no-op tensor operations.
Definition: tensor.hpp:122

smith::zero::operator()
SMITH_HOST_DEVICE auto operator()(T...) const
zero can be accessed like a multidimensional array
Definition: tensor.hpp:135

smith::zero::operator=
SMITH_HOST_DEVICE auto operator=(T)
anything assigned to zero does not change its value and returns zero
Definition: tensor.hpp:142