QMCPACK
DualAllocator.hpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2021 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
8 //
9 // Refactored from: OMPallocator.hpp
10 //////////////////////////////////////////////////////////////////////////////////////
11 // -*- C++ -*-
12 /** @file
13  */
14 #ifndef QMCPLUSPLUS_DUAL_ALLOCATOR_H
15 #define QMCPLUSPLUS_DUAL_ALLOCATOR_H
16 
17 #include <memory>
18 #include <type_traits>
19 #include <atomic>
20 #include <exception>
21 #include "config.h"
22 #include "allocator_traits.hpp"
23 #include "PinnedAllocator.h"
24 #if defined(ENABLE_CUDA)
25 #include "CUDA/CUDAallocator.hpp"
26 #elif defined(ENABLE_SYCL)
27 #include "SYCL/SYCLallocator.hpp"
28 #endif
29 
30 namespace qmcplusplus
31 {
32 extern std::atomic<size_t> dual_device_mem_allocated;
33 
35 
36 /** Generalizes the DualMemorySpace allocator
37  * This provides a limited alternative to OMPallocator for testing/benchmarking
38  * without dependence of OMPTarget/ offload.
39  * It does not provide an alternative to OMPtarget transfer semantics so many production
40  * objects will not be functional if it is used as the allocator for the data objects they depend
41  * on.
42  * If you use DualAllocator at this time you need to handle data transfer yourself.
43  *
44  * \todo the OMPTarget allocation can be a "device" allocator comparable to CUDAAllocator
45  * Then OMPallocator can be replaced by a DualAllocator<T, OffloadAllocator<T>, PinnedAllocator<T>>
46  */
47 template<typename T, class DeviceAllocator, class HostAllocator = std::allocator<T>>
48 struct DualAllocator : public HostAllocator
49 {
50  using Value = typename HostAllocator::value_type;
51  using Size = typename HostAllocator::size_type;
52  using Pointer = typename HostAllocator::pointer;
53  using ConstPointer = typename HostAllocator::const_pointer;
54 
55  DualAllocator() : device_ptr_(nullptr){};
56  DualAllocator(const DualAllocator&) : device_ptr_(nullptr) {}
58  {
59  device_ptr_ = nullptr;
60  }
61  template<class U, class V>
63  {}
64 
65  template<class U, class V>
66  struct rebind
67  {
69  };
70 
71  Value* allocate(std::size_t n)
72  {
73  static_assert(std::is_same<T, Value>::value, "DualAllocator and HostAllocator data types must agree!");
74  if (device_ptr_ != nullptr)
75  throw std::runtime_error("DualAllocator does not support device reallocation");
76  Value* host_ptr = std::allocator_traits<HostAllocator>::allocate(allocator_, n);
77  device_ptr_ = std::allocator_traits<DeviceAllocator>::allocate(device_allocator_, n);
78  dual_device_mem_allocated += n * sizeof(T);
79  return host_ptr;
80  }
81 
82  void deallocate(Value* pt, std::size_t n)
83  {
84  dual_device_mem_allocated -= n * sizeof(T);
85  std::allocator_traits<DeviceAllocator>::deallocate(device_allocator_, device_ptr_, n);
86  std::allocator_traits<HostAllocator>::deallocate(allocator_, pt, n);
87  device_ptr_ = nullptr;
88  }
89 
90  void attachReference(const DualAllocator& from, std::ptrdiff_t ptr_offset)
91  {
92  device_ptr_ = const_cast<Pointer>(from.get_device_ptr()) + ptr_offset;
93  }
94 
95  T* get_device_ptr() { return device_ptr_; }
96  const T* get_device_ptr() const { return device_ptr_; }
97 
100 
101 private:
102  HostAllocator allocator_;
105 };
106 
107 template<typename T, class DeviceAllocator, class HostAllocator>
109 {
111  static constexpr bool is_host_accessible = true;
112  static constexpr bool is_dual_space = true;
113 
114  static void fill_n(T* ptr, size_t n, const T& value) { qmc_allocator_traits<HostAllocator>::fill_n(ptr, n, value); }
115 
116  static void attachReference(const DualAlloc& from, DualAlloc& to, std::ptrdiff_t ptr_offset)
117  {
118  to.attachReference(from, ptr_offset);
119  }
120 
121  /** update to the device, assumes you are copying starting with the implicit host_ptr.
122  *
123  * These follow the openmp target semantics where you only provide the host
124  * side of a host_ptr device_ptr pair but the verb relates to what happens on the device.
125  *
126  * This is primarily for testing to reduce ifdef code and single "flavor" testing
127  *
128  * This a generic API and unlikely to be the best way to handle performance critical transfers,
129  * but if you have to use it or ifdef at a level above a xxxCUDA.cu or xxxOMPTarget.hpp file
130  * thats an issue.
131  */
132  static void updateTo(DualAlloc& alloc, T* host_ptr, size_t n, size_t offset = 0)
133  {
134  alloc.get_device_allocator().copyToDevice(alloc.get_device_ptr() + offset, host_ptr + offset, n);
135  }
136 
137  /** update from the device, assumes you are copying starting with the device_ptr to the implicit host_ptr.
138  */
139  static void updateFrom(DualAlloc& alloc, T* host_ptr, size_t n, size_t offset = 0)
140  {
141  alloc.get_device_allocator().copyFromDevice(host_ptr + offset, alloc.get_device_ptr() + offset, n);
142  }
143 
144  static void deviceSideCopyN(DualAlloc& alloc, size_t to, size_t n, size_t from)
145  {
146  T* device_ptr = alloc.get_device_ptr();
147  T* to_ptr = device_ptr + to;
148  T* from_ptr = device_ptr + from;
149  alloc.get_device_allocator().copyDeviceToDevice(to_ptr, n, from_ptr);
150  }
151 };
152 
153 } // namespace qmcplusplus
154 
155 #endif
DualAllocator(const DualAllocator< U, V > &)
Generalizes the DualMemorySpace allocator This provides a limited alternative to OMPallocator for tes...
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
static void deviceSideCopyN(DualAlloc &alloc, size_t to, size_t n, size_t from)
DeviceAllocator device_allocator_
static void updateFrom(DualAlloc &alloc, T *host_ptr, size_t n, size_t offset=0)
update from the device, assumes you are copying starting with the device_ptr to the implicit host_ptr...
static void attachReference(const DualAlloc &from, DualAlloc &to, std::ptrdiff_t ptr_offset)
this file provides three C++ memory allocators using CUDA specific memory allocation functions...
const DeviceAllocator & get_device_allocator() const
DualAllocator(const DualAllocator &)
static void updateTo(DualAlloc &alloc, T *host_ptr, size_t n, size_t offset=0)
update to the device, assumes you are copying starting with the implicit host_ptr.
typename HostAllocator::pointer Pointer
void attachReference(const DualAllocator &from, std::ptrdiff_t ptr_offset)
static constexpr bool is_host_accessible
DeviceAllocator & get_device_allocator()
template class analogous to std::allocator_traits.
const T * get_device_ptr() const
size_t getDualDeviceMemAllocated()
typename HostAllocator::const_pointer ConstPointer
std::atomic< size_t > dual_device_mem_allocated
this file provides three C++ memory allocators using SYCL specific memory allocation functions...
typename HostAllocator::size_type Size
void deallocate(Value *pt, std::size_t n)
DualAllocator & operator=(const DualAllocator &)
typename HostAllocator::value_type Value
Value * allocate(std::size_t n)
static void fill_n(value_type *ptr, size_t n, const value_type &value)
QMCTraits::FullPrecRealType value_type