15 #ifndef QMCPLUSPLUS_OMPTARGET_ALLOCATOR_H 16 #define QMCPLUSPLUS_OMPTARGET_ALLOCATOR_H 19 #include <type_traits> 23 #if defined(ENABLE_OFFLOAD) 27 #if defined(QMC_OFFLOAD_MEM_ASSOCIATED) 41 PRAGMA_OFFLOAD(
"omp target data use_device_ptr(host_ptr)") { device_ptr = host_ptr; }
59 template<
typename T,
class HostAllocator = std::allocator<T>>
63 using size_type =
typename HostAllocator::size_type;
64 using pointer =
typename HostAllocator::pointer;
74 template<
class U,
class V>
78 template<
class U,
class V>
86 static_assert(std::is_same<T, value_type>::value,
"OMPallocator and HostAllocator data types must agree!");
88 #if defined(QMC_OFFLOAD_MEM_ASSOCIATED) 90 const int status = omp_target_associate_ptr(pt,
device_ptr_,
n *
sizeof(T), 0, omp_get_default_device());
92 throw std::runtime_error(
"omp_target_associate_ptr failed in OMPallocator!");
94 PRAGMA_OFFLOAD(
"omp target enter data map(alloc:pt[0:n])")
104 #if defined(QMC_OFFLOAD_MEM_ASSOCIATED) 106 const int status = omp_target_disassociate_ptr(pt, omp_get_default_device());
108 throw std::runtime_error(
"omp_target_disassociate_ptr failed in OMPallocator!");
111 PRAGMA_OFFLOAD(
"omp target exit data map(delete:pt[0:n])")
113 HostAllocator::deallocate(pt,
n);
132 template<
typename T,
class HostAllocator>
138 static void fill_n(T* ptr,
size_t n,
const T& value)
146 std::ptrdiff_t ptr_offset)
153 PRAGMA_OFFLOAD(
"omp target update to(host_ptr[offset:n])");
158 PRAGMA_OFFLOAD(
"omp target update from(host_ptr[offset:n])");
165 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for is_device_ptr(dev_ptr)")
166 for (
int i = 0; i <
n; i++)
167 dev_ptr[to + i] = dev_ptr[from + i];
171 #if defined(ENABLE_OFFLOAD) 185 class OMPTargetAllocator
189 using size_type = size_t;
191 using const_pointer =
const T*;
193 OMPTargetAllocator() =
default;
195 OMPTargetAllocator(
const OMPTargetAllocator<U>&)
201 using other = OMPTargetAllocator<U>;
204 T* allocate(std::size_t
n)
206 void* pt = omp_target_alloc(
n *
sizeof(T), omp_get_default_device());
208 throw std::runtime_error(
"Allocation failed in OMPTargetAllocator!");
210 return static_cast<T*
>(pt);
213 void deallocate(T* p, std::size_t
n)
215 omp_target_free(p, omp_get_default_device());
235 template<
class U,
class... Args>
236 static void construct(U* p, Args&&... args)
244 static void destroy(U* p)
247 void copyToDevice(T* device_ptr, T* host_ptr,
size_t n)
249 const auto host_id = omp_get_initial_device();
250 if (omp_target_memcpy(device_ptr, host_ptr,
n, 0, 0, omp_get_default_device(), host_id))
251 throw std::runtime_error(
"omp_target_memcpy failed in copyToDevice");
254 void copyFromDevice(T* host_ptr, T* device_ptr,
size_t n)
256 const auto host_id = omp_get_initial_device();
257 if (omp_target_memcpy(host_ptr, device_ptr,
n, 0, 0, host_id, omp_get_default_device()))
258 throw std::runtime_error(
"omp_target_memcpy failed in copyToDevice");
261 void copyDeviceToDevice(T* to_ptr,
size_t n, T* from_ptr)
263 if (omp_target_memcpy(to_ptr, from_ptr,
n, 0, 0, omp_get_default_device(), omp_get_default_device()))
264 throw std::runtime_error(
"omp_target_memcpy failed in copyToDevice");
268 template<
class T1,
class T2>
269 bool operator==(
const OMPTargetAllocator<T1>&,
const OMPTargetAllocator<T2>&)
273 template<
class T1,
class T2>
274 bool operator!=(
const OMPTargetAllocator<T1>&,
const OMPTargetAllocator<T2>&)
280 struct qmc_allocator_traits<
qmcplusplus::OMPTargetAllocator<T>>
284 static void fill_n(T* ptr,
size_t n,
const T& value) {}
typename std::allocator< Value > ::const_pointer const_pointer
static constexpr bool is_dual_space
helper functions for EinsplineSetBuilder
void deallocate(value_type *pt, std::size_t n)
static void fill_n(T *ptr, size_t n, const T &value)
void attachReference(const OMPallocator &from, std::ptrdiff_t ptr_offset)
handle CUDA/HIP runtime selection.
size_t getOMPdeviceMemAllocated()
bool operator==(const Matrix< T, Alloc > &lhs, const Matrix< T, Alloc > &rhs)
static void deviceSideCopyN(OMPallocator< T, HostAllocator > &alloc, size_t to, size_t n, size_t from)
const T * get_device_ptr() const
typename std::allocator< Value > ::size_type size_type
T * getOffloadDevicePtr(T *host_ptr)
static void updateTo(OMPallocator< T, HostAllocator > &alloc, T *host_ptr, size_t n, size_t offset=0)
cudaErrorCheck(cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
OMPallocator(const OMPallocator &)
Gives you a OMPallocator with no state.
static constexpr bool is_host_accessible
typename std::allocator< Value > ::value_type value_type
OMPallocator is an allocator with fused device and dualspace allocator functionality.
template class analogous to std::allocator_traits.
OMPallocator(const OMPallocator< U, V > &)
static void attachReference(const OMPallocator< T, HostAllocator > &from, OMPallocator< T, HostAllocator > &to, std::ptrdiff_t ptr_offset)
std::atomic< size_t > OMPallocator_device_mem_allocated
typename std::allocator< Value > ::pointer pointer
value_type * allocate(std::size_t n)
static void fill_n(value_type *ptr, size_t n, const value_type &value)
QMCTraits::FullPrecRealType value_type
static void updateFrom(OMPallocator< T, HostAllocator > &alloc, T *host_ptr, size_t n, size_t offset=0)
bool operator!=(const Matrix< T, Alloc > &lhs, const Matrix< T, Alloc > &rhs)