16 #if defined(__CUDACC__)
17 #define SERAC_HOST_DEVICE __host__ __device__
18 #define SERAC_HOST __host__
19 #define SERAC_DEVICE __device__
27 #if __CUDAVER__ >= 75000
28 #define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
30 #define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
33 #include <cuda_runtime.h>
38 #define SERAC_HOST_DEVICE
50 #define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING
55 #include "axom/core.hpp"
85 template <ExecutionSpace space>
88 static constexpr axom::MemorySpace
value = axom::MemorySpace::Dynamic;
91 #ifdef SERAC_USE_UMPIRE
95 static constexpr axom::MemorySpace
value = axom::MemorySpace::Host;
101 static constexpr axom::MemorySpace
value = axom::MemorySpace::Device;
107 static constexpr axom::MemorySpace
value = axom::MemorySpace::Unified;
112 template <ExecutionSpace space>
116 template <
typename T,
int dim, axom::MemorySpace space>
119 std::memset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
123 template <
typename T,
int dim>
124 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)
126 cudaMemset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
133 template <
typename T,
int dim, ExecutionSpace space>
134 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;
137 template <
typename T,
int dim = 1>
140 #ifdef SERAC_USE_CUDA
143 template <
typename T,
int dim = 1>
147 template <
typename T,
int dim = 1>
154 template <
typename T,
int dim = 1>
158 template <
typename T,
int dim = 1>
164 template <
typename T,
int dim, ExecutionSpace space>
165 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;
168 template <
typename T,
int dim = 1>
171 #ifdef SERAC_USE_CUDA
173 template <
typename T,
int dim = 1>
178 template <
typename T,
int dim, axom::MemorySpace space>
179 auto view(axom::Array<T, dim, space>& arr)
181 return axom::ArrayView<T, dim, space>(arr);
187 namespace accelerator {
201 #if defined(__CUDACC__)
209 inline void displayLastCUDAMessage(
const char* success_string =
"",
bool exit_on_error =
false)
211 auto error = cudaGetLastError();
212 if (error != cudaError::cudaSuccess) {
218 }
else if (strlen(success_string) > 0) {
219 SLIC_INFO_ROOT(success_string);
231 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()
233 std::size_t free_memory, total_memory;
234 cudaMemGetInfo(&free_memory, &total_memory);
235 displayLastCUDAMessage();
243 inline std::string getCUDAMemInfoString()
245 auto [free_memory, total_memory] = getCUDAMemInfo();
246 return axom::fmt::format(
"Free memory: {} Total_memory: {}", free_memory, total_memory);
257 template <ExecutionSpace exec,
typename T>
260 if constexpr (exec == ExecutionSpace::CPU) {
261 return std::shared_ptr<T[]>(
new T[n]);
264 #if defined(__CUDACC__)
265 if constexpr (exec == ExecutionSpace::GPU) {
267 cudaMalloc(&data,
sizeof(T) * n);
268 auto deleter = [](T* ptr) { cudaFree(ptr); };
269 return std::shared_ptr<T[]>(data, deleter);
283 return std::tuple{make_shared_array<exec, T>(n)...};
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
This file contains the all the necessary functions and macros required for logging as well as a helpe...
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void initializeDevice()
Initializes the device (GPU)
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void terminateDevice()
Cleans up the device, if applicable.
std::string concat(T... args)
Produces a string by applying << to all arguments.
Accelerator functionality.
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
constexpr ExecutionSpace default_execution_space
The default execution space for serac builds.
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
SERAC_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
Various helper functions and macros for profiling using Caliper.
Trait for "translating" between serac::ExecutionSpace and axom::MemorySpace.
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.