Smith  0.1
Smith is an implicit thermal structural mechanics simulation code.
accelerator.hpp
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Smith Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
14 #pragma once
15 
16 #include "smith/smith_config.hpp"
17 
18 #if defined(SMITH_USE_CUDA) || defined(SMITH_USE_HIP)
23 #define SMITH_HOST_DEVICE __host__ __device__
27 #define SMITH_HOST __host__
31 #define SMITH_DEVICE __device__
32 #else
37 #define SMITH_HOST_DEVICE
41 #define SMITH_HOST
45 #define SMITH_DEVICE
46 #endif
47 
54 #if defined(__CUDACC__)
55 #if __CUDAVER__ >= 75000
56 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
57 #else
58 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
59 #endif
60 #include <cuda_runtime.h>
61 #else
65 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING
66 #endif
67 
68 #include <memory>
69 #include <cstring>
70 #include <tuple>
71 
72 #include "axom/core.hpp"
73 
77 #include "smith/smith_config.hpp"
78 
82 namespace smith {
83 
87 enum class ExecutionSpace
88 {
89  CPU,
90  GPU,
91  Dynamic // Corresponds to execution that can "legally" happen on either the host or device
92 };
93 
97 constexpr ExecutionSpace default_execution_space = ExecutionSpace::CPU;
98 
99 namespace detail {
100 
104 template <ExecutionSpace space>
107  static constexpr axom::MemorySpace value = axom::MemorySpace::Dynamic;
108 };
109 
110 #ifdef SMITH_USE_UMPIRE
112 template <>
113 struct execution_to_memory<ExecutionSpace::CPU> {
114  static constexpr axom::MemorySpace value = axom::MemorySpace::Host;
115 };
116 
118 template <>
119 struct execution_to_memory<ExecutionSpace::GPU> {
120  static constexpr axom::MemorySpace value = axom::MemorySpace::Device;
121 };
122 
124 template <>
125 struct execution_to_memory<ExecutionSpace::Dynamic> {
126  static constexpr axom::MemorySpace value = axom::MemorySpace::Unified;
127 };
128 #endif
129 
131 template <ExecutionSpace space>
132 inline constexpr axom::MemorySpace execution_to_memory_v = execution_to_memory<space>::value;
133 
135 template <typename T, int dim, axom::MemorySpace space>
136 void zero_out(axom::Array<T, dim, space>& arr)
137 {
138  std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
139 }
140 
142 template <typename T, int dim>
143 void zero_out(axom::ArrayView<T, dim, detail::host_memory_space>& arr)
144 {
145  std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
146 }
147 #ifdef __CUDACC__
149 template <typename T, int dim>
150 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)
151 {
152  cudaMemset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
153 }
154 #endif
155 
156 } // namespace detail
157 
159 template <typename T, int dim, ExecutionSpace space>
160 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;
161 
163 template <typename T, int dim = 1>
165 
166 #ifdef SMITH_USE_CUDA
167 
169 template <typename T, int dim = 1>
171 
173 template <typename T, int dim = 1>
175 
176 #else
177 // If not a CUDA build then force all arrays to be CPU
178 
180 template <typename T, int dim = 1>
182 
184 template <typename T, int dim = 1>
186 
187 #endif
188 
190 template <typename T, int dim, ExecutionSpace space>
191 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;
192 
194 template <typename T, int dim = 1>
196 
197 #ifdef SMITH_USE_CUDA
199 template <typename T, int dim = 1>
200 using GPUArrayView = ExecArrayView<T, dim, ExecutionSpace::GPU>;
201 #endif
202 
204 template <typename T, int dim, axom::MemorySpace space>
205 auto view(axom::Array<T, dim, space>& arr)
206 {
207  return axom::ArrayView<T, dim, space>(arr);
208 }
209 
213 namespace accelerator {
214 
220 void initializeDevice(ExecutionSpace exec_space);
221 
225 void terminateDevice();
226 
227 #if defined(__CUDACC__)
228 
235 inline void displayLastCUDAMessage(const char* success_string = "", bool exit_on_error = false)
236 {
237  auto error = cudaGetLastError();
238  if (error != cudaError::cudaSuccess) {
239  if (exit_on_error) {
240  SLIC_ERROR_ROOT(smith::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));
241  } else {
242  SLIC_WARNING_ROOT(smith::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));
243  }
244  } else if (strlen(success_string) > 0) {
245  SLIC_INFO_ROOT(success_string);
246  }
247 }
248 
257 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()
258 {
259  std::size_t free_memory, total_memory;
260  cudaMemGetInfo(&free_memory, &total_memory);
261  displayLastCUDAMessage();
262  return std::make_tuple(free_memory, total_memory);
263 }
264 
269 inline std::string getCUDAMemInfoString()
270 {
271  auto [free_memory, total_memory] = getCUDAMemInfo();
272  return std::format("Free memory: {} Total_memory: {}", free_memory, total_memory);
273 }
274 
275 #endif
276 
283 template <ExecutionSpace exec, typename T>
284 std::shared_ptr<T[]> make_shared_array(std::size_t n)
285 {
286  if constexpr (exec == ExecutionSpace::CPU) {
287  return std::shared_ptr<T[]>(new T[n]);
288  }
289 
290 #if defined(__CUDACC__)
291  if constexpr (exec == ExecutionSpace::GPU) {
292  T* data;
293  cudaMalloc(&data, sizeof(T) * n);
294  auto deleter = [](T* ptr) { cudaFree(ptr); };
295  return std::shared_ptr<T[]>(data, deleter);
296  }
297 #endif
298 }
299 
306 template <ExecutionSpace exec, typename... T>
307 auto make_shared_arrays(std::size_t n)
308 {
309  return std::tuple{make_shared_array<exec, T>(n)...};
310 }
311 
312 } // namespace accelerator
313 
314 } // namespace smith
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
This file contains the all the necessary functions and macros required for logging as well as a helpe...
This file defines the host memory space.
void initializeDevice(ExecutionSpace exec_space)
Initializes the device (GPU)
Definition: accelerator.cpp:24
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void terminateDevice()
Cleans up the device, if applicable.
Definition: accelerator.cpp:43
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
std::string concat(T... args)
Produces a string by applying << to all arguments.
Definition: profiling.hpp:117
Accelerator functionality.
Definition: smith.cpp:36
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
constexpr ExecutionSpace default_execution_space
The default execution space for Smith builds.
Definition: accelerator.hpp:97
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
Definition: accelerator.hpp:88
SMITH_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
Definition: tuple.hpp:266
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
Various helper functions and macros for profiling using Caliper.
Trait for "translating" between smith::ExecutionSpace and axom::MemorySpace.
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.