Serac  0.1
Serac is an implicit thermal strucural mechanics simulation code.
accelerator.hpp
Go to the documentation of this file.
1 // Copyright (c) 2019-2024, Lawrence Livermore National Security, LLC and
2 // other Serac Project Developers. See the top-level LICENSE file for
3 // details.
4 //
5 // SPDX-License-Identifier: (BSD-3-Clause)
6 
14 #pragma once
15 
16 #if defined(__CUDACC__)
17 #define SERAC_HOST_DEVICE __host__ __device__
18 #define SERAC_HOST __host__
19 #define SERAC_DEVICE __device__
20 
27 #if __CUDAVER__ >= 75000
28 #define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
29 #else
30 #define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
31 #endif
32 
33 #include <cuda_runtime.h>
34 #else //__CUDACC__
38 #define SERAC_HOST_DEVICE
42 #define SERAC_HOST
46 #define SERAC_DEVICE
50 #define SERAC_SUPPRESS_NVCC_HOSTDEVICE_WARNING
51 #endif
52 
53 #include <memory>
54 
55 #include "axom/core.hpp"
56 
59 
63 namespace serac {
64 
68 enum class ExecutionSpace
69 {
70  CPU,
71  GPU,
72  Dynamic // Corresponds to execution that can "legally" happen on either the host or device
73 };
74 
78 constexpr ExecutionSpace default_execution_space = ExecutionSpace::CPU;
79 
80 namespace detail {
81 
85 template <ExecutionSpace space>
88  static constexpr axom::MemorySpace value = axom::MemorySpace::Dynamic;
89 };
90 
91 #ifdef SERAC_USE_UMPIRE
93 template <>
95  static constexpr axom::MemorySpace value = axom::MemorySpace::Host;
96 };
97 
99 template <>
100 struct execution_to_memory<ExecutionSpace::GPU> {
101  static constexpr axom::MemorySpace value = axom::MemorySpace::Device;
102 };
103 
105 template <>
106 struct execution_to_memory<ExecutionSpace::Dynamic> {
107  static constexpr axom::MemorySpace value = axom::MemorySpace::Unified;
108 };
109 #endif
110 
112 template <ExecutionSpace space>
113 inline constexpr axom::MemorySpace execution_to_memory_v = execution_to_memory<space>::value;
114 
116 template <typename T, int dim, axom::MemorySpace space>
117 void zero_out(axom::Array<T, dim, space>& arr)
118 {
119  std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
120 }
121 #ifdef __CUDACC__
123 template <typename T, int dim>
124 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)
125 {
126  cudaMemset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));
127 }
128 #endif
129 
130 } // namespace detail
131 
133 template <typename T, int dim, ExecutionSpace space>
134 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;
135 
137 template <typename T, int dim = 1>
139 
140 #ifdef SERAC_USE_CUDA
141 
143 template <typename T, int dim = 1>
145 
147 template <typename T, int dim = 1>
149 
150 #else
151 // If not a CUDA build then force all arrays to be CPU
152 
154 template <typename T, int dim = 1>
156 
158 template <typename T, int dim = 1>
160 
161 #endif
162 
164 template <typename T, int dim, ExecutionSpace space>
165 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;
166 
168 template <typename T, int dim = 1>
170 
171 #ifdef SERAC_USE_CUDA
173 template <typename T, int dim = 1>
174 using GPUArrayView = ExecArrayView<T, dim, ExecutionSpace::GPU>;
175 #endif
176 
178 template <typename T, int dim, axom::MemorySpace space>
179 auto view(axom::Array<T, dim, space>& arr)
180 {
181  return axom::ArrayView<T, dim, space>(arr);
182 }
183 
187 namespace accelerator {
188 
194 void initializeDevice();
195 
199 void terminateDevice();
200 
201 #if defined(__CUDACC__)
202 
209 inline void displayLastCUDAMessage(const char* success_string = "", bool exit_on_error = false)
210 {
211  auto error = cudaGetLastError();
212  if (error != cudaError::cudaSuccess) {
213  if (exit_on_error) {
214  SLIC_ERROR_ROOT(serac::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));
215  } else {
216  SLIC_WARNING_ROOT(serac::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));
217  }
218  } else if (strlen(success_string) > 0) {
219  SLIC_INFO_ROOT(success_string);
220  }
221 }
222 
231 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()
232 {
233  std::size_t free_memory, total_memory;
234  cudaMemGetInfo(&free_memory, &total_memory);
235  displayLastCUDAMessage();
236  return std::make_tuple(free_memory, total_memory);
237 }
238 
243 inline std::string getCUDAMemInfoString()
244 {
245  auto [free_memory, total_memory] = getCUDAMemInfo();
246  return axom::fmt::format("Free memory: {} Total_memory: {}", free_memory, total_memory);
247 }
248 
249 #endif
250 
257 template <ExecutionSpace exec, typename T>
258 std::shared_ptr<T[]> make_shared_array(std::size_t n)
259 {
260  if constexpr (exec == ExecutionSpace::CPU) {
261  return std::shared_ptr<T[]>(new T[n]);
262  }
263 
264 #if defined(__CUDACC__)
265  if constexpr (exec == ExecutionSpace::GPU) {
266  T* data;
267  cudaMalloc(&data, sizeof(T) * n);
268  auto deleter = [](T* ptr) { cudaFree(ptr); };
269  return std::shared_ptr<T[]>(data, deleter);
270  }
271 #endif
272 }
273 
280 template <ExecutionSpace exec, typename... T>
281 auto make_shared_arrays(std::size_t n)
282 {
283  return std::tuple{make_shared_array<exec, T>(n)...};
284 }
285 
286 } // namespace accelerator
287 
288 } // namespace serac
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
This file contains the all the necessary functions and macros required for logging as well as a helpe...
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void initializeDevice()
Initializes the device (GPU)
Definition: accelerator.cpp:24
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
void terminateDevice()
Cleans up the device, if applicable.
Definition: accelerator.cpp:33
std::string concat(T... args)
Produces a string by applying << to all arguments.
Definition: profiling.hpp:190
Accelerator functionality.
Definition: serac.cpp:38
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
constexpr ExecutionSpace default_execution_space
The default execution space for serac builds.
Definition: accelerator.hpp:78
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
SERAC_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
Definition: tuple.hpp:180
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
Definition: accelerator.hpp:69
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
Various helper functions and macros for profiling using Caliper.
Trait for "translating" between serac::ExecutionSpace and axom::MemorySpace.
Definition: accelerator.hpp:86
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.
Definition: accelerator.hpp:88