20 const int* restrict delay_list_gpu,
21 const int delay_count,
26 const T* restrict Ainv,
27 const std::vector<sycl::event>& dependencies)
29 const size_t BS = 128;
30 const size_t NB = (numorbs + BS - 1) / BS;
32 return aq.parallel_for(sycl::nd_range<1>{{BS * NB}, {BS}}, dependencies, [=](sycl::nd_item<1> item) {
33 int col = item.get_global_id(0);
36 for (
int row = 0; row < delay_count; row++)
38 const T* Ainv_row = Ainv + numorbs * delay_list_gpu[row];
39 T* V_row = V_gpu + numorbs * row;
41 V_row[col] = Ainv_row[col];
45 if (col < delay_count)
46 temp_gpu[ndelay * delay_list_gpu[col] + col] -= T(1);
51 const int* restrict delay_list_gpu,
52 const int delay_count,
53 float* restrict temp_gpu,
56 float* restrict V_gpu,
57 const float* restrict Ainv,
58 const std::vector<sycl::event>& dependencies);
61 const int* restrict delay_list_gpu,
62 const int delay_count,
63 double* restrict temp_gpu,
66 double* restrict V_gpu,
67 const double* restrict Ainv,
68 const std::vector<sycl::event>& dependencies);
71 const int* restrict delay_list_gpu,
72 const int delay_count,
73 std::complex<float>* restrict temp_gpu,
76 std::complex<float>* restrict V_gpu,
77 const std::complex<float>* restrict Ainv,
78 const std::vector<sycl::event>& dependencies);
81 const int* restrict delay_list_gpu,
82 const int delay_count,
83 std::complex<double>* restrict temp_gpu,
86 std::complex<double>* restrict V_gpu,
87 const std::complex<double>* restrict Ainv,
88 const std::vector<sycl::event>& dependencies);
91 template<
typename T,
typename TMAT,
typename INDEX>
95 const TMAT* restrict a,
96 const INDEX* restrict pivot,
97 const std::vector<sycl::event>& dependencies)
99 constexpr
size_t COLBS = 128;
101 std::complex<T> result{};
103 sycl::buffer<std::complex<T>, 1> abuff(&result, {1});
104 aq.submit([&](sycl::handler& cgh) {
105 cgh.depends_on(dependencies);
107 size_t n_max = ((
n + COLBS - 1) / COLBS) * COLBS;
108 sycl::global_ptr<const TMAT>
A{a};
109 sycl::global_ptr<const INDEX> Pivot{pivot};
110 cgh.parallel_for(sycl::range<1>{n_max}, sycl::reduction(abuff, cgh, {T{}, T{}}, std::plus<std::complex<T>>()),
111 [=](sycl::id<1> i,
auto& sum) {
112 std::complex<T> val{};
114 val = (Pivot[i] == i + 1) ?
std::log(std::complex<T>(
A[i *
lda + i]))
126 const double* restrict a,
127 const std::int64_t* restrict pivot,
128 const std::vector<sycl::event>& dependencies);
133 const std::complex<double>* restrict a,
134 const std::int64_t* restrict pivot,
135 const std::vector<sycl::event>& dependencies);
helper functions for EinsplineSetBuilder
std::complex< T > computeLogDet_sycl(sycl::queue &aq, int n, int lda, const TMAT *restrict a, const INDEX *restrict pivot, const std::vector< sycl::event > &dependencies)
sycl::event applyW_stageV_sycl(sycl::queue &aq, const int *restrict delay_list_gpu, const int delay_count, T *restrict temp_gpu, const int numorbs, const int ndelay, T *restrict V_gpu, const T *restrict Ainv, const std::vector< sycl::event > &dependencies)
MakeReturn< UnaryNode< FnLog, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t log(const Vector< T1, C1 > &l)