QMCPACK
sycl_determinant_helper.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2022 QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
8 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
9 //
10 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
15 
16 namespace qmcplusplus
17 {
18 template<typename T>
20  const int* restrict delay_list_gpu,
21  const int delay_count,
22  T* restrict temp_gpu,
23  const int numorbs,
24  const int ndelay,
25  T* restrict V_gpu,
26  const T* restrict Ainv,
27  const std::vector<sycl::event>& dependencies)
28 {
29  const size_t BS = 128;
30  const size_t NB = (numorbs + BS - 1) / BS;
31 
32  return aq.parallel_for(sycl::nd_range<1>{{BS * NB}, {BS}}, dependencies, [=](sycl::nd_item<1> item) {
33  int col = item.get_global_id(0);
34 
35  // move rows of Ainv to V
36  for (int row = 0; row < delay_count; row++)
37  {
38  const T* Ainv_row = Ainv + numorbs * delay_list_gpu[row];
39  T* V_row = V_gpu + numorbs * row;
40  if (col < numorbs)
41  V_row[col] = Ainv_row[col];
42  }
43 
44  // apply W to temp
45  if (col < delay_count)
46  temp_gpu[ndelay * delay_list_gpu[col] + col] -= T(1);
47  });
48 }
49 
50 template sycl::event applyW_stageV_sycl(sycl::queue& aq,
51  const int* restrict delay_list_gpu,
52  const int delay_count,
53  float* restrict temp_gpu,
54  const int numorbs,
55  const int ndelay,
56  float* restrict V_gpu,
57  const float* restrict Ainv,
58  const std::vector<sycl::event>& dependencies);
59 
60 template sycl::event applyW_stageV_sycl(sycl::queue& aq,
61  const int* restrict delay_list_gpu,
62  const int delay_count,
63  double* restrict temp_gpu,
64  const int numorbs,
65  const int ndelay,
66  double* restrict V_gpu,
67  const double* restrict Ainv,
68  const std::vector<sycl::event>& dependencies);
69 
70 template sycl::event applyW_stageV_sycl(sycl::queue& aq,
71  const int* restrict delay_list_gpu,
72  const int delay_count,
73  std::complex<float>* restrict temp_gpu,
74  const int numorbs,
75  const int ndelay,
76  std::complex<float>* restrict V_gpu,
77  const std::complex<float>* restrict Ainv,
78  const std::vector<sycl::event>& dependencies);
79 
80 template sycl::event applyW_stageV_sycl(sycl::queue& aq,
81  const int* restrict delay_list_gpu,
82  const int delay_count,
83  std::complex<double>* restrict temp_gpu,
84  const int numorbs,
85  const int ndelay,
86  std::complex<double>* restrict V_gpu,
87  const std::complex<double>* restrict Ainv,
88  const std::vector<sycl::event>& dependencies);
89 
90 
91 template<typename T, typename TMAT, typename INDEX>
92 std::complex<T> computeLogDet_sycl(sycl::queue& aq,
93  int n,
94  int lda,
95  const TMAT* restrict a,
96  const INDEX* restrict pivot,
97  const std::vector<sycl::event>& dependencies)
98 {
99  constexpr size_t COLBS = 128;
100 
101  std::complex<T> result{};
102  {
103  sycl::buffer<std::complex<T>, 1> abuff(&result, {1});
104  aq.submit([&](sycl::handler& cgh) {
105  cgh.depends_on(dependencies);
106 
107  size_t n_max = ((n + COLBS - 1) / COLBS) * COLBS;
108  sycl::global_ptr<const TMAT> A{a};
109  sycl::global_ptr<const INDEX> Pivot{pivot};
110  cgh.parallel_for(sycl::range<1>{n_max}, sycl::reduction(abuff, cgh, {T{}, T{}}, std::plus<std::complex<T>>()),
111  [=](sycl::id<1> i, auto& sum) {
112  std::complex<T> val{};
113  if (i < n)
114  val = (Pivot[i] == i + 1) ? std::log(std::complex<T>(A[i * lda + i]))
115  : std::log(std::complex<T>(-A[i * lda + i]));
116  sum.combine(val);
117  });
118  });
119  } //synchronous
120  return result;
121 }
122 
123 template std::complex<double> computeLogDet_sycl(sycl::queue& aq,
124  int n,
125  int lda,
126  const double* restrict a,
127  const std::int64_t* restrict pivot,
128  const std::vector<sycl::event>& dependencies);
129 
130 template std::complex<double> computeLogDet_sycl(sycl::queue& aq,
131  int n,
132  int lda,
133  const std::complex<double>* restrict a,
134  const std::int64_t* restrict pivot,
135  const std::vector<sycl::event>& dependencies);
136 } // namespace qmcplusplus
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
std::complex< T > computeLogDet_sycl(sycl::queue &aq, int n, int lda, const TMAT *restrict a, const INDEX *restrict pivot, const std::vector< sycl::event > &dependencies)
sycl::event applyW_stageV_sycl(sycl::queue &aq, const int *restrict delay_list_gpu, const int delay_count, T *restrict temp_gpu, const int numorbs, const int ndelay, T *restrict V_gpu, const T *restrict Ainv, const std::vector< sycl::event > &dependencies)
MakeReturn< UnaryNode< FnLog, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t log(const Vector< T1, C1 > &l)