19 #ifndef QMCPLUSPLUS_CUDA_ALLOCATOR_H 20 #define QMCPLUSPLUS_CUDA_ALLOCATOR_H 63 if ((
size_t(pt)) & (QMC_SIMD_ALIGNMENT - 1))
64 throw std::runtime_error(
"Unaligned memory allocated in CUDAManagedAllocator");
65 return static_cast<T*
>(pt);
70 template<
class T1,
class T2>
75 template<
class T1,
class T2>
119 return static_cast<T*
>(pt);
143 template<
class U,
class... Args>
158 "cudaMemcpy failed in copyToDevice");
164 "cudaMemcpy failed in copyFromDevice");
170 "cudaMemcpy failed in copyDeviceToDevice");
174 template<
class T1,
class T2>
179 template<
class T1,
class T2>
219 return static_cast<T*
>(pt);
224 template<
class T1,
class T2>
229 template<
class T1,
class T2>
241 template<
typename T,
class ULPHA = std::allocator<T>>
250 template<
class U,
class V>
254 template<
class U,
class V>
262 static_assert(std::is_same<T, value_type>::value,
"CUDALockedPageAllocator and ULPHA data types must agree!");
265 "cudaHostRegister failed in CUDALockedPageAllocator!");
272 ULPHA::deallocate(pt,
n);
static constexpr bool is_dual_space
helper functions for EinsplineSetBuilder
void deallocate(T *p, std::size_t n)
CUDAManagedAllocator()=default
handle CUDA/HIP runtime selection.
typename ULPHA::size_type size_type
CUDAAllocator(const CUDAAllocator< U > &)
typename ULPHA::const_pointer const_pointer
bool operator==(const Matrix< T, Alloc > &lhs, const Matrix< T, Alloc > &rhs)
void deallocate(value_type *pt, std::size_t n)
CUDAHostAllocator(const CUDAHostAllocator< U > &)
void deallocate(T *p, std::size_t)
#define cudaHostRegisterDefault
void copyFromDevice(T *host_ptr, T *device_ptr, size_t n)
typename ULPHA::value_type value_type
const T_FP * const_pointer
static void destroy(U *p)
Give std::allocator_traits something to call.
CUDAHostAllocator()=default
void copyDeviceToDevice(T *to_ptr, size_t n, T *from_ptr)
T * allocate(std::size_t n)
CUDAManagedAllocator(const CUDAManagedAllocator< U > &)
cudaErrorCheck(cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
#define cudaMemcpyDeviceToHost
T * allocate(std::size_t n)
std::atomic< size_t > CUDAallocator_device_mem_allocated
const T_FP * const_pointer
static constexpr bool is_host_accessible
#define cudaMallocManaged
#define cudaMemcpyDeviceToDevice
#define cudaMemcpyHostToDevice
static void construct(U *p, Args &&... args)
Provide a construct for std::allocator_traits::contruct to call.
template class analogous to std::allocator_traits.
typename ULPHA::pointer pointer
void deallocate(T *p, std::size_t)
static void fill_n(T *ptr, size_t n, const T &value)
T * allocate(std::size_t n)
void copyToDevice(T *device_ptr, T *host_ptr, size_t n)
size_t getCUDAdeviceMemAllocated()
void CUDAfill_n(T *ptr, size_t n, const T &value)
fill device memory with a given value.
allocator for CUDA unified memory
#define cudaHostUnregister
value_type * allocate(std::size_t n)
allocator for CUDA host pinned memory
CUDALockedPageAllocator(const CUDALockedPageAllocator< U, V > &)
allocator for CUDA device memory
CUDALockedPageAllocator()=default
QMCTraits::FullPrecRealType value_type
bool operator!=(const Matrix< T, Alloc > &lhs, const Matrix< T, Alloc > &rhs)
allocator locks memory pages allocated by ULPHA