16 #include "smith/smith_config.hpp"
18 #if defined(SMITH_USE_CUDA) || defined(SMITH_USE_HIP)
23 #define SMITH_HOST_DEVICE __host__ __device__
27 #define SMITH_HOST __host__
31 #define SMITH_DEVICE __device__
37 #define SMITH_HOST_DEVICE
54 #if defined(__CUDACC__)
55 #if __CUDAVER__ >= 75000
56 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
58 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
60 #include <cuda_runtime.h>
65 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING
72 #include "axom/core.hpp"
77 #include "smith/smith_config.hpp"
104 template <ExecutionSpace space>
107 static constexpr axom::MemorySpace
value = axom::MemorySpace::Dynamic;
110 #ifdef SMITH_USE_UMPIRE
114 static constexpr axom::MemorySpace
value = axom::MemorySpace::Host;
120 static constexpr axom::MemorySpace
value = axom::MemorySpace::Device;
126 static constexpr axom::MemorySpace
value = axom::MemorySpace::Unified;
131 template <ExecutionSpace space>
135 template <
typename T,
int dim, axom::MemorySpace space>
138 std::memset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
142 template <
typename T,
int dim>
143 void zero_out(axom::ArrayView<T, dim, detail::host_memory_space>& arr)
145 std::memset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
149 template <
typename T,
int dim>
150 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)
152 cudaMemset(arr.data(), 0,
static_cast<std::size_t
>(arr.size()) *
sizeof(T));
159 template <
typename T,
int dim, ExecutionSpace space>
160 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;
163 template <
typename T,
int dim = 1>
166 #ifdef SMITH_USE_CUDA
169 template <
typename T,
int dim = 1>
173 template <
typename T,
int dim = 1>
180 template <
typename T,
int dim = 1>
184 template <
typename T,
int dim = 1>
190 template <
typename T,
int dim, ExecutionSpace space>
191 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;
194 template <
typename T,
int dim = 1>
197 #ifdef SMITH_USE_CUDA
199 template <
typename T,
int dim = 1>
204 template <
typename T,
int dim, axom::MemorySpace space>
205 auto view(axom::Array<T, dim, space>& arr)
207 return axom::ArrayView<T, dim, space>(arr);
213 namespace accelerator {
227 #if defined(__CUDACC__)
235 inline void displayLastCUDAMessage(
const char* success_string =
"",
bool exit_on_error =
false)
237 auto error = cudaGetLastError();
238 if (error != cudaError::cudaSuccess) {
244 }
else if (strlen(success_string) > 0) {
245 SLIC_INFO_ROOT(success_string);
257 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()
259 std::size_t free_memory, total_memory;
260 cudaMemGetInfo(&free_memory, &total_memory);
261 displayLastCUDAMessage();
269 inline std::string getCUDAMemInfoString()
271 auto [free_memory, total_memory] = getCUDAMemInfo();
272 return std::format(
"Free memory: {} Total_memory: {}", free_memory, total_memory);
283 template <ExecutionSpace exec,
typename T>
286 if constexpr (exec == ExecutionSpace::CPU) {
287 return std::shared_ptr<T[]>(
new T[n]);
290 #if defined(__CUDACC__)
291 if constexpr (exec == ExecutionSpace::GPU) {
293 cudaMalloc(&data,
sizeof(T) * n);
294 auto deleter = [](T* ptr) { cudaFree(ptr); };
295 return std::shared_ptr<T[]>(data, deleter);
309 return std::tuple{make_shared_array<exec, T>(n)...};
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
This file contains the all the necessary functions and macros required for logging as well as a helpe...
This file defines the host memory space.
void initializeDevice(ExecutionSpace exec_space)
Initializes the device (GPU)
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void terminateDevice()
Cleans up the device, if applicable.
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
std::string concat(T... args)
Produces a string by applying << to all arguments.
Accelerator functionality.
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
constexpr ExecutionSpace default_execution_space
The default execution space for Smith builds.
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
SMITH_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
Various helper functions and macros for profiling using Caliper.
Trait for "translating" between smith::ExecutionSpace and axom::MemorySpace.
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.