doxygen/html/accelerator_8hpp_source.html

 // Copyright (c) Lawrence Livermore National Security, LLC and

 // other Smith Project Developers. See the top-level LICENSE file for

 // details.

 //

 // SPDX-License-Identifier: (BSD-3-Clause)


 #pragma once


 #if defined(__CUDACC__)

 #define SMITH_HOST_DEVICE __host__ __device__

 #define SMITH_HOST __host__

 #define SMITH_DEVICE __device__


 #if __CUDAVER__ >= 75000

 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable

 #else

 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable

 #endif


 #include <cuda_runtime.h>

 #else  //__CUDACC__

 #define SMITH_HOST_DEVICE

 #define SMITH_HOST

 #define SMITH_DEVICE

 #define SMITH_SUPPRESS_NVCC_HOSTDEVICE_WARNING

 #endif


 #include <memory>

 #include <cstring>

 #include <tuple>


 #include "axom/core.hpp"


 #include "smith/infrastructure/logger.hpp"

 #include "smith/infrastructure/memory.hpp"

 #include "smith/infrastructure/profiling.hpp"

 #include "smith/smith_config.hpp"


 namespace smith {


 enum class ExecutionSpace

 {

   CPU,

   GPU,

   Dynamic  // Corresponds to execution that can "legally" happen on either the host or device

 };


 constexpr ExecutionSpace default_execution_space = ExecutionSpace::CPU;


 namespace detail {


 template <ExecutionSpace space>

 struct execution_to_memory {

   static constexpr axom::MemorySpace value = axom::MemorySpace::Dynamic;

 };


 #ifdef SMITH_USE_UMPIRE

 template <>

 struct execution_to_memory<ExecutionSpace::CPU> {

   static constexpr axom::MemorySpace value = axom::MemorySpace::Host;

 };


 template <>

 struct execution_to_memory<ExecutionSpace::GPU> {

   static constexpr axom::MemorySpace value = axom::MemorySpace::Device;

 };


 template <>

 struct execution_to_memory<ExecutionSpace::Dynamic> {

   static constexpr axom::MemorySpace value = axom::MemorySpace::Unified;

 };

 #endif


 template <ExecutionSpace space>

 inline constexpr axom::MemorySpace execution_to_memory_v = execution_to_memory<space>::value;


 template <typename T, int dim, axom::MemorySpace space>

 void zero_out(axom::Array<T, dim, space>& arr)

 {

   std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));

 }


 template <typename T, int dim>

 void zero_out(axom::ArrayView<T, dim, detail::host_memory_space>& arr)

 {

   std::memset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));

 }

 #ifdef __CUDACC__

 template <typename T, int dim>

 void zero_out(axom::Array<T, dim, execution_to_memory_v<ExecutionSpace::GPU>>& arr)

 {

   cudaMemset(arr.data(), 0, static_cast<std::size_t>(arr.size()) * sizeof(T));

 }

 #endif


 }  // namespace detail


 template <typename T, int dim, ExecutionSpace space>

 using ExecArray = axom::Array<T, dim, detail::execution_to_memory_v<space>>;


 template <typename T, int dim = 1>

 using CPUArray = ExecArray<T, dim, ExecutionSpace::CPU>;


 #ifdef SMITH_USE_CUDA


 template <typename T, int dim = 1>

 using GPUArray = ExecArray<T, dim, ExecutionSpace::GPU>;


 template <typename T, int dim = 1>

 using UnifiedArray = ExecArray<T, dim, ExecutionSpace::Dynamic>;


 #else

 // If not a CUDA build then force all arrays to be CPU


 template <typename T, int dim = 1>

 using GPUArray = ExecArray<T, dim, ExecutionSpace::CPU>;


 template <typename T, int dim = 1>

 using UnifiedArray = ExecArray<T, dim, ExecutionSpace::CPU>;


 #endif


 template <typename T, int dim, ExecutionSpace space>

 using ExecArrayView = axom::ArrayView<T, dim, detail::execution_to_memory_v<space>>;


 template <typename T, int dim = 1>

 using CPUArrayView = ExecArrayView<T, dim, ExecutionSpace::CPU>;


 #ifdef SMITH_USE_CUDA

 template <typename T, int dim = 1>

 using GPUArrayView = ExecArrayView<T, dim, ExecutionSpace::GPU>;

 #endif


 template <typename T, int dim, axom::MemorySpace space>

 auto view(axom::Array<T, dim, space>& arr)

 {

   return axom::ArrayView<T, dim, space>(arr);

 }


 namespace accelerator {


 void initializeDevice();


 void terminateDevice();


 #if defined(__CUDACC__)


 inline void displayLastCUDAMessage(const char* success_string = "", bool exit_on_error = false)

 {

   auto error = cudaGetLastError();

   if (error != cudaError::cudaSuccess) {

     if (exit_on_error) {

       SLIC_ERROR_ROOT(smith::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));

     } else {

       SLIC_WARNING_ROOT(smith::profiling::concat("Last CUDA Error Message :", cudaGetErrorString(error)));

     }

   } else if (strlen(success_string) > 0) {

     SLIC_INFO_ROOT(success_string);

   }

 }


 inline std::tuple<std::size_t, std::size_t> getCUDAMemInfo()

 {

   std::size_t free_memory, total_memory;

   cudaMemGetInfo(&free_memory, &total_memory);

   displayLastCUDAMessage();

   return std::make_tuple(free_memory, total_memory);

 }


 inline std::string getCUDAMemInfoString()

 {

   auto [free_memory, total_memory] = getCUDAMemInfo();

   return axom::fmt::format("Free memory: {} Total_memory: {}", free_memory, total_memory);

 }


 #endif


 template <ExecutionSpace exec, typename T>

 std::shared_ptr<T[]> make_shared_array(std::size_t n)

 {

   if constexpr (exec == ExecutionSpace::CPU) {

     return std::shared_ptr<T[]>(new T[n]);

   }


 #if defined(__CUDACC__)

   if constexpr (exec == ExecutionSpace::GPU) {

     T* data;

     cudaMalloc(&data, sizeof(T) * n);

     auto deleter = [](T* ptr) { cudaFree(ptr); };

     return std::shared_ptr<T[]>(data, deleter);

   }

 #endif

 }


 template <ExecutionSpace exec, typename... T>

 auto make_shared_arrays(std::size_t n)

 {

   return std::tuple{make_shared_array<exec, T>(n)...};

 }


 }  // namespace accelerator


 }  // namespace smith

smith::detail::execution_to_memory_v
constexpr axom::MemorySpace execution_to_memory_v
Helper template for execution_to_memory trait.
Definition: accelerator.hpp:117

smith::detail::zero_out
void zero_out(axom::Array< T, dim, space > &arr)
set the contents of an array to zero, byte-wise
Definition: accelerator.hpp:121

logger.hpp
This file contains the all the necessary functions and macros required for logging as well as a helpe...

memory.hpp
This file defines the host memory space.

smith::accelerator::initializeDevice
void initializeDevice()
Initializes the device (GPU)
Definition: accelerator.cpp:24

smith::accelerator::make_shared_array
std::shared_ptr< T[]> make_shared_array(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
Definition: accelerator.hpp:269

smith::accelerator::terminateDevice
void terminateDevice()
Cleans up the device, if applicable.
Definition: accelerator.cpp:33

smith::accelerator::make_shared_arrays
auto make_shared_arrays(std::size_t n)
create shared_ptr to an array of n values of type T, either on the host or device
Definition: accelerator.hpp:292

smith::profiling::concat
std::string concat(T... args)
Produces a string by applying << to all arguments.
Definition: profiling.hpp:117

smith
Accelerator functionality.
Definition: smith.cpp:36

smith::GPUArray
ExecArray< T, dim, ExecutionSpace::CPU > GPUArray
Alias for an array on the GPU.
Definition: accelerator.hpp:166

smith::default_execution_space
constexpr ExecutionSpace default_execution_space
The default execution space for Smith builds.
Definition: accelerator.hpp:82

smith::tuple
tuple(T...) -> tuple< T... >
Class template argument deduction rule for tuples.

smith::view
auto view(axom::Array< T, dim, space > &arr)
convenience function for creating a view of an axom::Array type
Definition: accelerator.hpp:190

smith::ExecArray
axom::Array< T, dim, detail::execution_to_memory_v< space > > ExecArray
Alias for an Array corresponding to a particular ExecutionSpace.
Definition: accelerator.hpp:145

smith::CPUArray
ExecArray< T, dim, ExecutionSpace::CPU > CPUArray
Alias for an array on the CPU.
Definition: accelerator.hpp:149

smith::ExecutionSpace
ExecutionSpace
enum used for signalling whether or not to perform certain calculations on the CPU or GPU
Definition: accelerator.hpp:73

smith::make_tuple
SMITH_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
Definition: tuple.hpp:266

smith::UnifiedArray
ExecArray< T, dim, ExecutionSpace::CPU > UnifiedArray
Alias for an array in unified memory.
Definition: accelerator.hpp:170

smith::CPUArrayView
ExecArrayView< T, dim, ExecutionSpace::CPU > CPUArrayView
Alias for an array view on the CPU.
Definition: accelerator.hpp:180

smith::ExecArrayView
axom::ArrayView< T, dim, detail::execution_to_memory_v< space > > ExecArrayView
Alias for an ArrayView corresponding to a particular ExecutionSpace.
Definition: accelerator.hpp:176

profiling.hpp
Various helper functions and macros for profiling using Caliper.

smith::detail::execution_to_memory
Trait for "translating" between smith::ExecutionSpace and axom::MemorySpace.
Definition: accelerator.hpp:90

smith::detail::execution_to_memory::value
static constexpr axom::MemorySpace value
The corresponding axom::MemorySpace.
Definition: accelerator.hpp:92