16 #if defined(__CUDACC__)
17 #define SMITH_HOST_DEVICE __host__ __device__
18 #define SMITH_HOST __host__
19 #define SMITH_DEVICE __device__
27 #if __CUDAVER__ >= 75000
28 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
30 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
33 #include <cuda_runtime.h>
38 #define SMITH_HOST_DEVICE
50 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING
57 #include "axom/core.hpp"
62 #include "smith/smith_config.hpp"
89 template <ExecutionSpace space>
92 static constexpr axom::MemorySpace
value = axom::MemorySpace::Dynamic;
95 #ifdef SMITH_USE_UMPIRE
99 static constexpr axom::MemorySpace
value = axom::MemorySpace::Host;
105 static constexpr axom::MemorySpace
value = axom::MemorySpace::Device;
111 static constexpr axom::MemorySpace
value = axom::MemorySpace::Unified;
116 template <ExecutionSpace space>
120 template <
typename T,
int dim, axom::MemorySpace space>
123 std::memset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
127 template <
typename T,
int dim>
128 void zero_out(axom::ArrayView<T, dim, detail::host_memory_space>& arr)
130 std::memset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
134 template <
typename T,
int dim>
135 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)
137 cudaMemset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
144 template <
typename T,
int dim, ExecutionSpace space>
145 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;
148 template <
typename T,
int dim = 1>
151 #ifdef SMITH_USE_CUDA
154 template <
typename T,
int dim = 1>
158 template <
typename T,
int dim = 1>
165 template <
typename T,
int dim = 1>
169 template <
typename T,
int dim = 1>
175 template <
typename T,
int dim, ExecutionSpace space>
176 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;
179 template <
typename T,
int dim = 1>
182 #ifdef SMITH_USE_CUDA
184 template <
typename T,
int dim = 1>
189 template <
typename T,
int dim, axom::MemorySpace space>
190 auto view(axom::Array<T, dim, space>& arr)
192 return axom::ArrayView<T, dim, space>(arr);
198 namespace accelerator {
212 #if defined(__CUDACC__)
220 inline void displayLastCUDAMessage(
const char* success_string =
"",
bool exit_on_error =
false)
222 auto error = cudaGetLastError();
223 if (error != cudaError::cudaSuccess) {
229 }
else if (strlen(success_string) > 0) {
230 SLIC_INFO_ROOT(success_string);
242 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()
244 std::size_t free_memory, total_memory;
245 cudaMemGetInfo(&free_memory, &total_memory);
246 displayLastCUDAMessage();
254 inline std::string getCUDAMemInfoString()
256 auto [free_memory, total_memory] = getCUDAMemInfo();
257 return axom::fmt::format(
"Free memory: {} Total_memory: {}", free_memory, total_memory);
268 template <ExecutionSpace exec,
typename T>
271 if constexpr (exec == ExecutionSpace::CPU) {
272 return std::shared_ptr<T[]>(
new T[n]);
275 #if defined(__CUDACC__)
276 if constexpr (exec == ExecutionSpace::GPU) {
278 cudaMalloc(&data,
sizeof(T) * n);
279 auto deleter = [](T* ptr) { cudaFree(ptr); };
280 return std::shared_ptr<T[]>(data, deleter);
294 return std::tuple{make_shared_array<exec, T>(n)...};
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
This file contains the all the necessary functions and macros required for logging as well as a helpe...
This file defines the host memory space.
void initializeDevice()
Initializes the device (GPU)
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void terminateDevice()
Cleans up the device, if applicable.
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
std::string concat(T... args)
Produces a string by applying << to all arguments.
Accelerator functionality.
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
constexpr ExecutionSpace default_execution_space
The default execution space for Smith builds.
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
SMITH_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
Various helper functions and macros for profiling using Caliper.
Trait for "translating" between smith::ExecutionSpace and axom::MemorySpace.
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.