QMCPACK
SYCLDeviceManager.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2022 QMCPACK developers.
6 //
7 // File developed by: Thomas Applencourt, apl@anl.gov, Argonne National Laboratory
8 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
9 //
10 // File created by: Thomas Applencourt, apl@anl.gov, Argonne National Laboratory
11 //
12 //////////////////////////////////////////////////////////////////////////////////////
13 
14 
15 #include "SYCLDeviceManager.h"
16 #include <stdexcept>
17 #include <string>
18 #include <algorithm>
19 #include "OutputManager.h"
21 
22 namespace qmcplusplus
23 {
24 #if defined(ENABLE_OFFLOAD)
25 syclDeviceInfo::syclDeviceInfo(const sycl::context& context, const sycl::device& device, const omp_interop_t& interop)
26  : context_(context), device_(device), interop_(interop)
27 #else
28 syclDeviceInfo::syclDeviceInfo(const sycl::context& context, const sycl::device& device)
29  : context_(context), device_(device)
30 #endif
31 {}
32 
34 {
35 #if defined(ENABLE_OFFLOAD)
36 #pragma omp interop destroy(interop_)
37 #endif
38 }
39 
40 std::unique_ptr<sycl::queue> SYCLDeviceManager::default_device_queue;
41 
42 SYCLDeviceManager::SYCLDeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size)
43  : sycl_default_device_num(-1)
44 {
45 #if defined(ENABLE_OFFLOAD)
46  const size_t omp_num_devices = omp_get_num_devices();
47  visible_devices.reserve(omp_num_devices);
48  for (int id = 0; id < omp_num_devices; id++)
49  {
50  omp_interop_t interop;
51 #pragma omp interop device(id) init(prefer_type("level_zero"), targetsync : interop)
52 
53  int err = -1;
54  const std::string omp_backend_name(omp_get_interop_str(interop, omp_ipr_fr_name, &err));
55  if (err != omp_irc_success)
56  throw std::runtime_error("omp_get_interop_str(omp_ipr_fr_name) failed!");
57 
58  if (omp_backend_name.find("level_zero") != 0)
59  throw std::runtime_error("Interop between OpenMP and SYCL is only supported when both implementations are built "
60  "on top of Level Zero API.");
61 
62  auto hPlatform = omp_get_interop_ptr(interop, omp_ipr_platform, &err);
63  if (err != omp_irc_success)
64  throw std::runtime_error("omp_get_interop_ptr(omp_ipr_platform) failed!");
65  auto hContext = omp_get_interop_ptr(interop, omp_ipr_device_context, &err);
66  if (err != omp_irc_success)
67  throw std::runtime_error("omp_get_interop_ptr(omp_ipr_device_context) failed!");
68  auto hDevice = omp_get_interop_ptr(interop, omp_ipr_device, &err);
69  if (err != omp_irc_success)
70  throw std::runtime_error("omp_get_interop_ptr(omp_ipr_device) failed!");
71 
72  const sycl::platform sycl_platform =
73  sycl::ext::oneapi::level_zero::make_platform(reinterpret_cast<pi_native_handle>(hPlatform));
74 
75  const sycl::device sycl_device =
76  sycl::ext::oneapi::level_zero::make_device(sycl_platform, reinterpret_cast<pi_native_handle>(hDevice));
77 
79  .emplace_back(sycl::ext::oneapi::level_zero::make_context({sycl_device},
80  reinterpret_cast<pi_native_handle>(hContext),
81  true /* keep the ownership, no transfer */),
82  sycl_device, interop);
83  }
84 
85 #else
86  // Potentially multiple GPU platform.
87  std::vector<sycl::platform> platforms = sycl::platform::get_platforms();
88  if (platforms.empty())
89  throw std::runtime_error("Cannot find SYCL platforms!");
90 
91  // find out devices from the first platform with GPUs.
92  std::vector<sycl::device> devices;
93  app_log() << "Visible SYCL platforms are :" << std::endl;
94  for (auto& platform : platforms)
95  {
96  std::vector<sycl::device> gpu_devices = platform.get_devices(sycl::info::device_type::gpu);
97  const auto gpu_count = gpu_devices.size();
98  bool selected = false;
99  if (devices.empty() && gpu_count > 0)
100  {
101  selected = true;
102  devices = std::move(gpu_devices);
103  }
104  app_log() << (selected ? " ** " : " ") << platform.get_info<sycl::info::platform::name>() << " with "
105  << gpu_count << " GPUs." << std::endl;
106  }
107  app_log() << std::endl;
108 
109  visible_devices.reserve(devices.size());
110  for (int id = 0; id < devices.size(); id++)
111  visible_devices.emplace_back(sycl::context(devices[id]), devices[id]);
112 #endif
113 
114  const size_t sycl_device_count = visible_devices.size();
115  if (num_devices == 0)
116  num_devices = sycl_device_count;
117  else if (num_devices != sycl_device_count)
118  throw std::runtime_error("Inconsistent number of SYCL devices with the previous record!");
119  if (sycl_device_count > local_size)
120  app_warning() << "More SYCL devices than the number of MPI ranks. "
121  << "Some devices will be left idle.\n"
122  << "There is potential performance issue with the GPU affinity.\n";
123  if (num_devices > 0)
124  {
125  sycl_default_device_num = determineDefaultDeviceNum(sycl_device_count, local_rank, local_size);
126  if (default_device_num < 0)
127  default_device_num = sycl_default_device_num;
128  else if (default_device_num != sycl_default_device_num)
129  throw std::runtime_error("Inconsistent assigned SYCL devices with the previous record!");
130  default_device_queue = std::make_unique<sycl::queue>(visible_devices[sycl_default_device_num].get_context(),
132  sycl::property::queue::in_order());
133  if (!visible_devices[sycl_default_device_num].get_device().has(sycl::aspect::ext_intel_free_memory))
134  app_warning()
135  << "Free memory queries always return 0 due to inactive 'oneAPI' System Resource Management (sysman). "
136  << "Set environment variable ZES_ENABLE_SYSMAN to 1 to activate the query feature." << std::endl;
137  }
138 }
139 
141 {
143  throw std::runtime_error("SYCLDeviceManager::getDefaultDeviceQueue() the global instance not initialized.");
144  return *default_device_queue;
145 }
146 
147 } // namespace qmcplusplus
static std::unique_ptr< sycl::queue > default_device_queue
the global singleton which can be used to access the default queue of the default device...
std::ostream & app_warning()
Definition: OutputManager.h:69
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
Declaration of OutputManager class.
std::ostream & app_log()
Definition: OutputManager.h:65
SYCLDeviceManager(int &default_device_num, int &num_devices, int local_rank, int local_size)
std::vector< syclDeviceInfo > visible_devices
syclDeviceInfo(const sycl::context &context, const sycl::device &device)
int determineDefaultDeviceNum(int num_devices, int rank_id, int num_ranks)
distribute MPI ranks among devices
static sycl::queue & getDefaultDeviceDefaultQueue()
access the the DeviceManager owned default queue.