Smith  0.1
Smith is an implicit thermal structural mechanics simulation code.
accelerator.hpp
Go to the documentation of this file.
1 // Copyright (c) Lawrence Livermore National Security, LLC and
2 // other Smith Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
14 #pragma once
15 
16 #if defined(__CUDACC__)
17 #define SMITH_HOST_DEVICE __host__ __device__
18 #define SMITH_HOST __host__
19 #define SMITH_DEVICE __device__
20 
27 #if __CUDAVER__ >= 75000
28 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
29 #else
30 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
31 #endif
32 
33 #include <cuda_runtime.h>
34 #else //__CUDACC__
38 #define SMITH_HOST_DEVICE
42 #define SMITH_HOST
46 #define SMITH_DEVICE
50 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING
51 #endif
52 
53 #include <memory>
54 #include <cstring>
55 #include <tuple>
56 
57 #include "axom/core.hpp"
58 
62 #include "smith/smith_config.hpp"
63 
67 namespace smith {
68 
72 enum class ExecutionSpace
73 {
74  CPU,
75  GPU,
76  Dynamic // Corresponds to execution that can "legally" happen on either the host or device
77 };
78 
82 constexpr ExecutionSpace default_execution_space = ExecutionSpace::CPU;
83 
84 namespace detail {
85 
89 template <ExecutionSpace space>
92  static constexpr axom::MemorySpace value = axom::MemorySpace::Dynamic;
93 };
94 
95 #ifdef SMITH_USE_UMPIRE
97 template <>
99  static constexpr axom::MemorySpace value = axom::MemorySpace::Host;
100 };
101 
103 template <>
104 struct execution_to_memory<ExecutionSpace::GPU> {
105  static constexpr axom::MemorySpace value = axom::MemorySpace::Device;
106 };
107 
109 template <>
110 struct execution_to_memory<ExecutionSpace::Dynamic> {
111  static constexpr axom::MemorySpace value = axom::MemorySpace::Unified;
112 };
113 #endif
114 
116 template <ExecutionSpace space>
117 inline constexpr axom::MemorySpace execution_to_memory_v = execution_to_memory<space>::value;
118 
120 template <typename T, int dim, axom::MemorySpace space>
121 void zero_out(axom::Array<T, dim, space>& arr)
122 {
123  std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
124 }
125 
127 template <typename T, int dim>
128 void zero_out(axom::ArrayView<T, dim, detail::host_memory_space>& arr)
129 {
130  std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
131 }
132 #ifdef __CUDACC__
134 template <typename T, int dim>
135 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)
136 {
137  cudaMemset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
138 }
139 #endif
140 
141 } // namespace detail
142 
144 template <typename T, int dim, ExecutionSpace space>
145 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;
146 
148 template <typename T, int dim = 1>
150 
151 #ifdef SMITH_USE_CUDA
152 
154 template <typename T, int dim = 1>
156 
158 template <typename T, int dim = 1>
160 
161 #else
162 // If not a CUDA build then force all arrays to be CPU
163 
165 template <typename T, int dim = 1>
167 
169 template <typename T, int dim = 1>
171 
172 #endif
173 
175 template <typename T, int dim, ExecutionSpace space>
176 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;
177 
179 template <typename T, int dim = 1>
181 
182 #ifdef SMITH_USE_CUDA
184 template <typename T, int dim = 1>
185 using GPUArrayView = ExecArrayView<T, dim, ExecutionSpace::GPU>;
186 #endif
187 
189 template <typename T, int dim, axom::MemorySpace space>
190 auto view(axom::Array<T, dim, space>& arr)
191 {
192  return axom::ArrayView<T, dim, space>(arr);
193 }
194 
198 namespace accelerator {
199 
205 void initializeDevice();
206 
210 void terminateDevice();
211 
212 #if defined(__CUDACC__)
213 
220 inline void displayLastCUDAMessage(const char* success_string = "", bool exit_on_error = false)
221 {
222  auto error = cudaGetLastError();
223  if (error != cudaError::cudaSuccess) {
224  if (exit_on_error) {
225  SLIC_ERROR_ROOT(smith::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));
226  } else {
227  SLIC_WARNING_ROOT(smith::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));
228  }
229  } else if (strlen(success_string) > 0) {
230  SLIC_INFO_ROOT(success_string);
231  }
232 }
233 
242 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()
243 {
244  std::size_t free_memory, total_memory;
245  cudaMemGetInfo(&free_memory, &total_memory);
246  displayLastCUDAMessage();
247  return std::make_tuple(free_memory, total_memory);
248 }
249 
254 inline std::string getCUDAMemInfoString()
255 {
256  auto [free_memory, total_memory] = getCUDAMemInfo();
257  return axom::fmt::format("Free memory: {} Total_memory: {}", free_memory, total_memory);
258 }
259 
260 #endif
261 
268 template <ExecutionSpace exec, typename T>
269 std::shared_ptr<T[]> make_shared_array(std::size_t n)
270 {
271  if constexpr (exec == ExecutionSpace::CPU) {
272  return std::shared_ptr<T[]>(new T[n]);
273  }
274 
275 #if defined(__CUDACC__)
276  if constexpr (exec == ExecutionSpace::GPU) {
277  T* data;
278  cudaMalloc(&data, sizeof(T) * n);
279  auto deleter = [](T* ptr) { cudaFree(ptr); };
280  return std::shared_ptr<T[]>(data, deleter);
281  }
282 #endif
283 }
284 
291 template <ExecutionSpace exec, typename... T>
292 auto make_shared_arrays(std::size_t n)
293 {
294  return std::tuple{make_shared_array<exec, T>(n)...};
295 }
296 
297 } // namespace accelerator
298 
299 } // namespace smith
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
This file contains the all the necessary functions and macros required for logging as well as a helpe...
This file defines the host memory space.
void initializeDevice()
Initializes the device (GPU)
Definition: accelerator.cpp:24
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void terminateDevice()
Cleans up the device, if applicable.
Definition: accelerator.cpp:33
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
std::string concat(T... args)
Produces a string by applying << to all arguments.
Definition: profiling.hpp:117
Accelerator functionality.
Definition: smith.cpp:36
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
constexpr ExecutionSpace default_execution_space
The default execution space for Smith builds.
Definition: accelerator.hpp:82
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
Definition: accelerator.hpp:73
SMITH_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
Definition: tuple.hpp:266
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
Various helper functions and macros for profiling using Caliper.
Trait for "translating" between smith::ExecutionSpace and axom::MemorySpace.
Definition: accelerator.hpp:90
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.
Definition: accelerator.hpp:92