QMCPACK
LCAOrbitalSet.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 
13 #include "LCAOrbitalSet.h"
15 #include "CPU/BLAS.hpp"
16 #include "OMPTarget/ompBLAS.hpp"
17 #include <ResourceCollection.h>
18 #include <AccelBLAS.hpp>
19 
20 namespace qmcplusplus
21 {
22 
24 {
25  template<typename DT>
27 
28  LCAOMultiWalkerMem() : Resource("LCAOrbitalSet"), blas_handle(queue) {}
30 
31  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<LCAOMultiWalkerMem>(*this); }
32 
33  OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
34  OffloadMWVGLArray basis_vgl_mw; // [5][NW][NumAO]
35  OffloadMWVArray phi_v; // [NW][NumMO]
36  OffloadMWVArray basis_v_mw; // [NW][NumAO]
37  OffloadMWVArray vp_phi_v; // [NVPs][NumMO]
38  OffloadMWVArray vp_basis_v_mw; // [NVPs][NumAO]
40  OffloadMatrix<ValueType> rg_buffer; // [4][NVPs]
42 
43 #if defined(ENABLE_CUDA) && defined(ENABLE_OFFLOAD)
46 #else
49 #endif
50 };
51 
52 LCAOrbitalSet::LCAOrbitalSet(const std::string& my_name,
53  std::unique_ptr<basis_type>&& bs,
54  size_t norbs,
55  bool identity,
56  bool use_offload)
57  : SPOSet(my_name),
58  BasisSetSize(bs ? bs->getBasisSetSize() : 0),
59  Identity(identity),
60  useOMPoffload_(use_offload),
61  basis_timer_(createGlobalTimer("LCAOrbitalSet::Basis", timer_level_fine)),
62  mo_timer_(createGlobalTimer("LCAOrbitalSet::MO", timer_level_fine))
63 {
64  if (!bs)
65  throw std::runtime_error("LCAOrbitalSet cannot take nullptr as its basis set!");
66  myBasisSet = std::move(bs);
67  OrbitalSetSize = norbs;
71  OrbitalSetSize = norbs;
72  if (!Identity)
73  {
77  C = std::make_shared<OffloadValueMatrix>(OrbitalSetSize, BasisSetSize);
78  }
80 }
81 
83  : SPOSet(in),
84  myBasisSet(in.myBasisSet->makeClone()),
85  C(in.C),
86  BasisSetSize(in.BasisSetSize),
87  C_copy(in.C_copy),
88  Identity(in.Identity),
89  useOMPoffload_(in.useOMPoffload_),
90  basis_timer_(in.basis_timer_),
91  mo_timer_(in.mo_timer_)
92 {
96  if (!in.Identity)
97  {
101  }
103 }
104 
106 {
107  throw std::runtime_error("LCAOrbitalSet::setOrbitalSetSize should not be called");
108 }
109 
111 {
112  if (Identity)
113  {
115  throw std::runtime_error(
116  "LCAOrbitalSet::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!");
117  if (C)
118  throw std::runtime_error("LCAOrbitalSet::checkObject C should be nullptr if Identity = true!");
119  }
120  else
121  {
122  if (!C)
123  throw std::runtime_error("LCAOrbitalSet::checkObject C should not be nullptr if Identity = false!");
124  if (OrbitalSetSize != C->rows())
125  throw std::runtime_error("LCAOrbitalSet::checkObject C rows doesn't match OrbitalSetSize.");
126  if (BasisSetSize != C->cols())
127  throw std::runtime_error("LCAOrbitalSet::checkObject C columns doesn't match BasisSetSize.");
128  }
129 }
130 
132 {
133  if (C)
134  C->updateTo();
135 }
136 
138 {
139  myBasisSet->createResource(collection);
140 
141  auto resource_index = collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
142 }
143 
145 {
146  assert(this == &spo_list.getLeader());
147  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
148 
149  spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list));
150 
151  spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
152 }
153 
155 {
156  assert(this == &spo_list.getLeader());
157  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
158 
159  spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list));
160 
161  collection.takebackResource(spo_leader.mw_mem_handle_);
162 }
163 
165  const RefVectorWithLeader<SPOSet>& spo_list) const
166 {
168  basis_list.reserve(spo_list.size());
169  for (size_t iw = 0; iw < spo_list.size(); iw++)
170  basis_list.push_back(*spo_list.getCastedElement<LCAOrbitalSet>(iw).myBasisSet);
171  return basis_list;
172 }
173 std::unique_ptr<SPOSet> LCAOrbitalSet::makeClone() const { return std::make_unique<LCAOrbitalSet>(*this); }
174 
176 {
177  if (Identity)
178  { //PAY ATTENTION TO COMPLEX
179  myBasisSet->evaluateV(P, iat, psi.data());
180  }
181  else
182  {
184  myBasisSet->evaluateV(P, iat, vTemp.data());
185  assert(psi.size() <= OrbitalSetSize);
186  ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
187  MatrixOperators::product(C_partial_view, vTemp, psi);
188  }
189 }
190 
191 /** Find a better place for other user classes, Matrix should be padded as well */
192 template<typename T, unsigned D, typename Alloc>
194 {
195  constexpr char transa = 't';
196  constexpr char transb = 'n';
197  constexpr T zone(1);
198  constexpr T zero(0);
199  BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(),
200  C.capacity());
201 }
202 
204  ValueVector& psi,
205  GradVector& dpsi,
206  ValueVector& d2psi) const
207 {
208  const size_t output_size = psi.size();
209  std::copy_n(temp.data(0), output_size, psi.data());
210  const ValueType* restrict gx = temp.data(1);
211  const ValueType* restrict gy = temp.data(2);
212  const ValueType* restrict gz = temp.data(3);
213  for (size_t j = 0; j < output_size; j++)
214  {
215  dpsi[j][0] = gx[j];
216  dpsi[j][1] = gy[j];
217  dpsi[j][2] = gz[j];
218  }
219  std::copy_n(temp.data(4), output_size, d2psi.data());
220 }
221 
223  ValueVector& psi,
224  GradVector& dpsi,
225  HessVector& d2psi) const
226 {
227  const size_t output_size = psi.size();
228  std::copy_n(temp.data(0), output_size, psi.data());
229  const ValueType* restrict gx = temp.data(1);
230  const ValueType* restrict gy = temp.data(2);
231  const ValueType* restrict gz = temp.data(3);
232  const ValueType* restrict hxx = temp.data(4);
233  const ValueType* restrict hxy = temp.data(5);
234  const ValueType* restrict hxz = temp.data(6);
235  const ValueType* restrict hyy = temp.data(7);
236  const ValueType* restrict hyz = temp.data(8);
237  const ValueType* restrict hzz = temp.data(9);
238 
239  for (size_t j = 0; j < output_size; j++)
240  {
241  dpsi[j][0] = gx[j];
242  dpsi[j][1] = gy[j];
243  dpsi[j][2] = gz[j];
244 
245  d2psi[j](0, 0) = hxx[j];
246  d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
247  d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
248  d2psi[j](1, 1) = hyy[j];
249  d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
250  d2psi[j](2, 2) = hzz[j];
251  }
252 }
253 
255  int i,
256  ValueMatrix& psi,
257  GradMatrix& dpsi,
258  HessMatrix& d2psi,
259  GGGMatrix& dghpsi) const
260 {
261  const size_t output_size = psi.cols();
262  std::copy_n(temp.data(0), output_size, psi[i]);
263  const ValueType* restrict gx = temp.data(1);
264  const ValueType* restrict gy = temp.data(2);
265  const ValueType* restrict gz = temp.data(3);
266  const ValueType* restrict hxx = temp.data(4);
267  const ValueType* restrict hxy = temp.data(5);
268  const ValueType* restrict hxz = temp.data(6);
269  const ValueType* restrict hyy = temp.data(7);
270  const ValueType* restrict hyz = temp.data(8);
271  const ValueType* restrict hzz = temp.data(9);
272  const ValueType* restrict gh_xxx = temp.data(10);
273  const ValueType* restrict gh_xxy = temp.data(11);
274  const ValueType* restrict gh_xxz = temp.data(12);
275  const ValueType* restrict gh_xyy = temp.data(13);
276  const ValueType* restrict gh_xyz = temp.data(14);
277  const ValueType* restrict gh_xzz = temp.data(15);
278  const ValueType* restrict gh_yyy = temp.data(16);
279  const ValueType* restrict gh_yyz = temp.data(17);
280  const ValueType* restrict gh_yzz = temp.data(18);
281  const ValueType* restrict gh_zzz = temp.data(19);
282 
283  for (size_t j = 0; j < output_size; j++)
284  {
285  dpsi[i][j][0] = gx[j];
286  dpsi[i][j][1] = gy[j];
287  dpsi[i][j][2] = gz[j];
288 
289  d2psi[i][j](0, 0) = hxx[j];
290  d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
291  d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
292  d2psi[i][j](1, 1) = hyy[j];
293  d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
294  d2psi[i][j](2, 2) = hzz[j];
295 
296  dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx
297  dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy
298  dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz
299  dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
300  dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy
301  dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz
302  dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
303  dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
304  dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz
305 
306  dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
307  dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
308  dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
309  dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
310  dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy
311  dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz
312  dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
313  dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz
314  dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz
315 
316  dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz
317  dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
318  dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
319  dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
320  dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
321  dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
322  dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
323  dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
324  dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz
325  }
326 }
327 
329  ValueVector& psi,
330  GradVector& dpsi,
331  HessVector& d2psi,
332  GGGVector& dghpsi) const
333 {
334  const size_t output_size = psi.size();
335  std::copy_n(temp.data(0), output_size, psi.data());
336  const ValueType* restrict gx = temp.data(1);
337  const ValueType* restrict gy = temp.data(2);
338  const ValueType* restrict gz = temp.data(3);
339  const ValueType* restrict hxx = temp.data(4);
340  const ValueType* restrict hxy = temp.data(5);
341  const ValueType* restrict hxz = temp.data(6);
342  const ValueType* restrict hyy = temp.data(7);
343  const ValueType* restrict hyz = temp.data(8);
344  const ValueType* restrict hzz = temp.data(9);
345  const ValueType* restrict gh_xxx = temp.data(10);
346  const ValueType* restrict gh_xxy = temp.data(11);
347  const ValueType* restrict gh_xxz = temp.data(12);
348  const ValueType* restrict gh_xyy = temp.data(13);
349  const ValueType* restrict gh_xyz = temp.data(14);
350  const ValueType* restrict gh_xzz = temp.data(15);
351  const ValueType* restrict gh_yyy = temp.data(16);
352  const ValueType* restrict gh_yyz = temp.data(17);
353  const ValueType* restrict gh_yzz = temp.data(18);
354  const ValueType* restrict gh_zzz = temp.data(19);
355 
356  for (size_t j = 0; j < output_size; j++)
357  {
358  dpsi[j][0] = gx[j];
359  dpsi[j][1] = gy[j];
360  dpsi[j][2] = gz[j];
361 
362  d2psi[j](0, 0) = hxx[j];
363  d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
364  d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
365  d2psi[j](1, 1) = hyy[j];
366  d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
367  d2psi[j](2, 2) = hzz[j];
368 
369  dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx
370  dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy
371  dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz
372  dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy
373  dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy
374  dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz
375  dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz
376  dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz
377  dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz
378 
379  dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy
380  dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy
381  dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz
382  dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy
383  dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy
384  dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz
385  dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz
386  dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy
387  dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz
388 
389  dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz
390  dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz
391  dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz
392  dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz
393  dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz
394  dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz
395  dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz
396  dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz
397  dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz
398  }
399 }
400 
402 {
403  const size_t output_size = dpsi.size();
404  const ValueType* restrict gx = temp.data(1);
405  const ValueType* restrict gy = temp.data(2);
406  const ValueType* restrict gz = temp.data(3);
407 
408  for (size_t j = 0; j < output_size; j++)
409  {
410  //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
411  // for an atomic center, the ion gradient is the negative of the elecron gradient.
412  // Hence minus signs for each of these.
413  dpsi[j][0] = -gx[j];
414  dpsi[j][1] = -gy[j];
415  dpsi[j][2] = -gz[j];
416  }
417 }
418 
419 
420 void LCAOrbitalSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
421 {
422  //TAKE CARE OF IDENTITY
423  {
424  ScopedTimer local(basis_timer_);
425  myBasisSet->evaluateVGL(P, iat, Temp);
426  }
427 
428  if (Identity)
429  evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
430  else
431  {
432  assert(psi.size() <= OrbitalSetSize);
433  {
434  ScopedTimer local(mo_timer_);
435  ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
436  Product_ABt(Temp, C_partial_view, Tempv);
437  }
438  evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
439  }
440 }
441 
443  const RefVectorWithLeader<ParticleSet>& P_list,
444  int iat,
445  const RefVector<ValueVector>& psi_v_list,
446  const RefVector<GradVector>& dpsi_v_list,
447  const RefVector<ValueVector>& d2psi_v_list) const
448 {
449  assert(this == &spo_list.getLeader());
450  if (!useOMPoffload_)
451  {
452  SPOSet::mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
453  return;
454  }
455 
456  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
457  auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
458 
459  phi_vgl_v.resize(DIM_VGL, spo_list.size(), OrbitalSetSize);
460  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
461 
462  const size_t nw = phi_vgl_v.size(1);
463  phi_vgl_v.updateFrom();
464 
465  //TODO: make this cleaner?
466  for (int iw = 0; iw < nw; iw++)
467  {
468  const size_t output_size = psi_v_list[iw].get().size();
469  std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
470  std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
471  // grads are [dim, walker, orb] in phi_vgl_v
472  // [walker][orb, dim] in dpsi_v_list
473  for (size_t idim = 0; idim < DIM; idim++)
474  BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], DIM);
475  }
476 }
477 
479  const RefVectorWithLeader<ParticleSet>& P_list,
480  int iat,
481  OffloadMWVGLArray& phi_vgl_v) const
482 {
483  assert(this == &spo_list.getLeader());
484  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
485  auto& mw_res = spo_leader.mw_mem_handle_.getResource();
486  auto& basis_vgl_mw = mw_res.basis_vgl_mw;
487  basis_vgl_mw.resize(DIM_VGL, spo_list.size(), BasisSetSize);
488 
489  {
490  ScopedTimer local(basis_timer_);
491  auto basis_list = spo_leader.extractBasisRefList(spo_list);
492  myBasisSet->mw_evaluateVGL(basis_list, P_list, iat, basis_vgl_mw);
493  // basis_vgl_mw correct on device
494  }
495 
496  if (Identity)
497  {
498  const size_t output_size = phi_vgl_v.size(2);
499  const size_t nw = phi_vgl_v.size(1);
500  if (useOMPoffload_)
501  {
502  int dummy_handle = 0;
503  int success = 0;
504  success = ompBLAS::copy(dummy_handle, output_size * nw * DIM_VGL, basis_vgl_mw.device_data(), 1,
505  phi_vgl_v.device_data(), 1);
506  if (success != 0)
507  throw std::runtime_error("In LCAOrbitalSet::mw_evaluateVGLImplGEMM ompBLAS::copy failed.");
508  }
509  else
510  std::copy_n(basis_vgl_mw.data(), output_size * nw * DIM_VGL, phi_vgl_v.data());
511  }
512  else
513  {
514  ScopedTimer local(mo_timer_);
515  const size_t requested_orb_size = phi_vgl_v.size(2);
516  assert(requested_orb_size <= OrbitalSetSize);
517  {
518  if (useOMPoffload_)
519  {
520  auto* c_devptr = C->device_data();
521  compute::BLAS::gemm(mw_res.blas_handle, 'T', 'N',
522  requested_orb_size, // MOs
523  spo_list.size() * DIM_VGL, // walkers * DIM_VGL
524  BasisSetSize, // AOs
525  ValueType(1), c_devptr, BasisSetSize, basis_vgl_mw.device_data(), BasisSetSize,
526  ValueType(0), phi_vgl_v.device_data(), requested_orb_size);
527  mw_res.queue.sync();
528  }
529  else
530  {
531  ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
532  // TODO: make class for general blas interface in Platforms
533  // have instance of that class as member of LCAOrbitalSet, call gemm through that
534  BLAS::gemm('T', 'N',
535  requested_orb_size, // MOs
536  spo_list.size() * DIM_VGL, // walkers * DIM_VGL
537  BasisSetSize, // AOs
538  1, C_partial_view.data(), BasisSetSize, basis_vgl_mw.data(), BasisSetSize, 0, phi_vgl_v.data(),
539  requested_orb_size);
540  }
541  }
542  }
543  // phi_vgl_v correct on device if useOMPoffload_
544 }
545 
548  OffloadMWVArray& vp_phi_v) const
549 {
550  assert(this == &spo_list.getLeader());
551  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
552  auto& mw_res = spo_leader.mw_mem_handle_.getResource();
553  auto& vp_basis_v_mw = mw_res.vp_basis_v_mw;
554  //Splatter basis_v
555  const size_t nVPs = vp_phi_v.size(0);
556  vp_basis_v_mw.resize(nVPs, BasisSetSize);
557 
558  auto basis_list = spo_leader.extractBasisRefList(spo_list);
559  myBasisSet->mw_evaluateValueVPs(basis_list, vp_list, vp_basis_v_mw);
560 
561  if (Identity)
562  {
563  if (useOMPoffload_)
564  {
565  int dummy_handle = 0;
566  int success = 0;
567  success =
568  ompBLAS::copy(dummy_handle, OrbitalSetSize * nVPs, vp_basis_v_mw.device_data(), 1, vp_phi_v.device_data(), 1);
569  if (success != 0)
570  throw std::runtime_error("In LCAOrbitalSet::mw_evaluateValueVPsImplGEMM ompBLAS::copy failed.");
571  }
572  else
573  std::copy_n(vp_basis_v_mw.data_at(0, 0), OrbitalSetSize * nVPs, vp_phi_v.data_at(0, 0));
574  }
575  else
576  {
577  ScopedTimer local(mo_timer_);
578  const size_t requested_orb_size = vp_phi_v.size(1);
579  assert(requested_orb_size <= OrbitalSetSize);
580 
581  if (useOMPoffload_)
582  {
583  auto* c_devptr = C->device_data();
584  compute::BLAS::gemm(mw_res.blas_handle, 'T', 'N',
585  requested_orb_size, // MOs
586  nVPs, // walkers * Virtual Particles
587  BasisSetSize, // AOs
588  ValueType(1), c_devptr, BasisSetSize, vp_basis_v_mw.device_data(), BasisSetSize, ValueType(0),
589  vp_phi_v.device_data(), requested_orb_size);
590  mw_res.queue.sync();
591  }
592  else
593  {
594  ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
595  BLAS::gemm('T', 'N',
596  requested_orb_size, // MOs
597  nVPs, // walkers * Virtual Particles
598  BasisSetSize, // AOs
599  1, C_partial_view.data(), BasisSetSize, vp_basis_v_mw.data(), BasisSetSize, 0, vp_phi_v.data(),
600  requested_orb_size);
601  }
602  }
603 }
604 
606  const RefVectorWithLeader<ParticleSet>& P_list,
607  int iat,
608  const RefVector<ValueVector>& psi_v_list) const
609 {
610  assert(this == &spo_list.getLeader());
611  if (!useOMPoffload_)
612  {
613  SPOSet::mw_evaluateValue(spo_list, P_list, iat, psi_v_list);
614  return;
615  }
616 
617  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
618  auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v;
619  phi_v.resize(spo_list.size(), OrbitalSetSize);
620  mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
621 
622  const size_t output_size = phi_v.size(1);
623  const size_t nw = phi_v.size(0);
624  phi_v.updateFrom();
625 
626  for (int iw = 0; iw < nw; iw++)
627  std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
628 }
629 
631  const RefVectorWithLeader<ParticleSet>& P_list,
632  int iat,
633  OffloadMWVArray& phi_v) const
634 {
635  assert(this == &spo_list.getLeader());
636  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
637  const size_t nw = spo_list.size();
638  auto& mw_res = spo_leader.mw_mem_handle_.getResource();
639  auto& basis_v_mw = mw_res.basis_v_mw;
640  basis_v_mw.resize(nw, BasisSetSize);
641 
642  auto basis_list = spo_leader.extractBasisRefList(spo_list);
643  myBasisSet->mw_evaluateValue(basis_list, P_list, iat, basis_v_mw);
644 
645  if (Identity)
646  {
647  if (useOMPoffload_)
648  {
649  int dummy_handle = 0;
650  int success = 0;
651  success = ompBLAS::copy(dummy_handle, OrbitalSetSize * nw, basis_v_mw.device_data(), 1, phi_v.device_data(), 1);
652  if (success != 0)
653  throw std::runtime_error("In LCAOrbitalSet::mw_evaluateValueImplGEMM ompBLAS::copy failed.");
654  }
655  else
656  std::copy_n(basis_v_mw.data(), OrbitalSetSize * nw, phi_v.data());
657  }
658  else
659  {
660  ScopedTimer local(mo_timer_);
661  const size_t requested_orb_size = phi_v.size(1);
662  assert(requested_orb_size <= OrbitalSetSize);
663 
664  if (useOMPoffload_)
665  {
666  auto* c_devptr = C->device_data();
667  compute::BLAS::gemm(mw_res.blas_handle, 'T', 'N',
668  requested_orb_size, // MOs
669  nw, // walkers
670  BasisSetSize, // AOs
671  ValueType(1), c_devptr, BasisSetSize, basis_v_mw.device_data(), BasisSetSize, ValueType(0),
672  phi_v.device_data(), requested_orb_size);
673  mw_res.queue.sync();
674  }
675  else
676  {
677  ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
678  BLAS::gemm('T', 'N',
679  requested_orb_size, // MOs
680  spo_list.size(), // walkers
681  BasisSetSize, // AOs
682  1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(),
683  requested_orb_size);
684  }
685  }
686 }
687 
690  const RefVector<ValueVector>& psi_list,
691  const std::vector<const ValueType*>& invRow_ptr_list,
692  std::vector<std::vector<ValueType>>& ratios_list) const
693 {
694  assert(this == &spo_list.getLeader());
695  if (!useOMPoffload_)
696  {
697  SPOSet::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list);
698  return;
699  }
700 
701  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
702  auto& vp_phi_v = spo_leader.mw_mem_handle_.getResource().vp_phi_v;
703 
704  const size_t nVPs = VirtualParticleSet::countVPs(vp_list);
705  const size_t requested_orb_size = psi_list[0].get().size();
706  vp_phi_v.resize(nVPs, requested_orb_size);
707 
708  mw_evaluateValueVPsImplGEMM(spo_list, vp_list, vp_phi_v);
709 
710  if (useOMPoffload_)
711  {
712  const size_t nw = vp_list.size();
713  auto& invRow_deviceptr_list = spo_leader.mw_mem_handle_.getResource().invRow_deviceptr_list;
714  auto& rg_buffer = spo_leader.mw_mem_handle_.getResource().rg_buffer;
715 
716  invRow_deviceptr_list.resize(nVPs);
717  rg_buffer.resize(4, nVPs);
718 
719  for (size_t iw = 0, istart = 0; iw < nw; iw++)
720  {
721  const size_t nvp_i = vp_list[iw].getTotalNum();
722  std::fill_n(invRow_deviceptr_list.begin() + istart, nvp_i, invRow_ptr_list[iw]);
723  istart += nvp_i;
724  }
725  auto* invRow_deviceptr_list_ptr = invRow_deviceptr_list.data();
726  auto* vp_phi_v_ptr = vp_phi_v.data();
727  auto* rg_buffer_ptr = rg_buffer.data();
728  PRAGMA_OFFLOAD("omp target teams distribute parallel for \
729  map(always,to: invRow_deviceptr_list_ptr[:nVPs]) \
730  map(to: vp_phi_v_ptr[:nVPs*requested_orb_size]) \
731  map(always, from: rg_buffer_ptr[:nVPs])")
732  for (size_t ivp = 0; ivp < nVPs; ivp++)
733  {
734  rg_buffer_ptr[ivp] = 0;
735  for (size_t iorb = 0; iorb < requested_orb_size; iorb++)
736  rg_buffer_ptr[ivp] += vp_phi_v_ptr[ivp * requested_orb_size + iorb] * invRow_deviceptr_list_ptr[ivp][iorb];
737  }
738 
739  for (size_t iw = 0, index = 0; iw < nw; iw++)
740  for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
741  ratios_list[iw][iat] = rg_buffer[0][index++];
742  }
743  else
744  for (size_t iw = 0, index = 0; iw < vp_list.size(); iw++)
745  for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
746  ratios_list[iw][iat] = simd::dot(vp_phi_v.data_at(index++, 0), invRow_ptr_list[iw], requested_orb_size);
747 }
748 
750  ValueVector& psi,
751  const ValueVector& psiinv,
752  std::vector<ValueType>& ratios)
753 {
756 
757  if (Identity)
758  std::copy_n(psiinv.data(), psiinv.size(), invTemp.data());
759  else
760  {
761  ScopedTimer local(mo_timer_);
762  // when only a subset of orbitals is used, extract limited rows of C.
763  Matrix<ValueType> C_occupied(C->data(), psiinv.size(), BasisSetSize);
764  MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
765  }
766 
767  for (size_t j = 0; j < VP.getTotalNum(); j++)
768  {
769  {
770  ScopedTimer local(basis_timer_);
771  myBasisSet->evaluateV(VP, j, vTemp.data());
772  }
773  ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
774  }
775 }
776 
778  const RefVectorWithLeader<ParticleSet>& P_list,
779  int iat,
780  const std::vector<const ValueType*>& invRow_ptr_list,
781  OffloadMWVGLArray& phi_vgl_v,
782  std::vector<ValueType>& ratios,
783  std::vector<GradType>& grads) const
784 {
785  assert(this == &spo_list.getLeader());
786  assert(phi_vgl_v.size(0) == DIM_VGL);
787  assert(phi_vgl_v.size(1) == spo_list.size());
788 
789  if (!useOMPoffload_)
790  {
791  SPOSet::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads);
792  return;
793  }
794 
795  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
796  // Device data of phi_vgl_v must be up-to-date upon return
797  // phi_vgl_v.updateTo(); // moved updateTo to mw_evaluateVGLImplGEMM
798 
799  const size_t nw = spo_list.size();
800  const size_t norb_requested = phi_vgl_v.size(2);
801  if (useOMPoffload_)
802  {
803  auto& spo_leader = spo_list.getCastedLeader<LCAOrbitalSet>();
804  auto& invRow_deviceptr_list = spo_leader.mw_mem_handle_.getResource().invRow_deviceptr_list;
805  auto& rg_buffer = spo_leader.mw_mem_handle_.getResource().rg_buffer;
806 
807  invRow_deviceptr_list.resize(nw);
808  rg_buffer.resize(4, nw);
809 
810  for (size_t iw = 0; iw < nw; iw++)
811  invRow_deviceptr_list[iw] = invRow_ptr_list[iw];
812 
813  auto* invRow_deviceptr_list_ptr = invRow_deviceptr_list.data();
814  auto* phi_vgl_v_ptr = phi_vgl_v.data();
815  auto* rg_buffer_ptr = rg_buffer.data();
816  const size_t phi_vgl_stride = nw * norb_requested;
817 
818  PRAGMA_OFFLOAD("omp target teams distribute \
819  map(always,to: invRow_deviceptr_list_ptr[:nw]) \
820  map(to: phi_vgl_v_ptr[:nw*norb_requested]) \
821  map(always, from: rg_buffer_ptr[:rg_buffer.size()])")
822  for (size_t iw = 0; iw < nw; iw++)
823  {
824  auto* phi_v_ptr = phi_vgl_v_ptr + iw * norb_requested;
825  auto* phi_gx_ptr = phi_v_ptr + phi_vgl_stride;
826  auto* phi_gy_ptr = phi_gx_ptr + phi_vgl_stride;
827  auto* phi_gz_ptr = phi_gy_ptr + phi_vgl_stride;
828  auto* invRow = invRow_deviceptr_list_ptr[iw];
829 
830  ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
831  PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)")
832  for (size_t iorb = 0; iorb < norb_requested; iorb++)
833  {
834  ratio += phi_v_ptr[iorb] * invRow[iorb];
835  grad_x += phi_gx_ptr[iorb] * invRow[iorb];
836  grad_y += phi_gy_ptr[iorb] * invRow[iorb];
837  grad_z += phi_gz_ptr[iorb] * invRow[iorb];
838  }
839 
840  rg_buffer_ptr[iw] = ratio;
841  rg_buffer_ptr[iw + nw] = grad_x / ratio;
842  rg_buffer_ptr[iw + nw * 2] = grad_y / ratio;
843  rg_buffer_ptr[iw + nw * 3] = grad_z / ratio;
844  }
845 
846  for (size_t iw = 0; iw < nw; iw++)
847  {
848  ratios[iw] = rg_buffer[0][iw];
849  grads[iw] = {rg_buffer[1][iw], rg_buffer[2][iw], rg_buffer[3][iw]};
850  }
851  }
852  else
853  for (int iw = 0; iw < nw; iw++)
854  {
855  ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
856  GradType dphi;
857  for (size_t idim = 0; idim < DIM; idim++)
858  dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
859  grads[iw] = dphi;
860  }
861 }
862 
863 void LCAOrbitalSet::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
864 {
865  //TAKE CARE OF IDENTITY
866  myBasisSet->evaluateVGH(P, iat, Temph);
867  if (Identity)
868  evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
869  else
870  {
871  assert(psi.size() <= OrbitalSetSize);
872  ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
873  Product_ABt(Temph, C_partial_view, Temphv);
874  evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
875  }
876 }
877 
879  int iat,
880  ValueVector& psi,
881  GradVector& dpsi,
882  HessVector& dhpsi,
883  GGGVector& dghpsi)
884 {
885  // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n");
886 
887  //TAKE CARE OF IDENTITY
888  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
889  if (Identity)
890  evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
891  else
892  {
893  assert(psi.size() <= OrbitalSetSize);
894  ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
895  Product_ABt(Tempgh, C_partial_view, Tempghv);
896  evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
897  }
898 }
899 
900 /* implement using gemm algorithm */
902  int i,
903  ValueMatrix& logdet,
904  GradMatrix& dlogdet,
905  ValueMatrix& d2logdet) const
906 {
907  const size_t output_size = logdet.cols();
908  std::copy_n(temp.data(0), output_size, logdet[i]);
909  const ValueType* restrict gx = temp.data(1);
910  const ValueType* restrict gy = temp.data(2);
911  const ValueType* restrict gz = temp.data(3);
912  for (size_t j = 0; j < output_size; j++)
913  {
914  dlogdet[i][j][0] = gx[j];
915  dlogdet[i][j][1] = gy[j];
916  dlogdet[i][j][2] = gz[j];
917  }
918  std::copy_n(temp.data(4), output_size, d2logdet[i]);
919 }
920 
922  int i,
923  ValueMatrix& psi,
924  GradMatrix& dpsi,
925  HessMatrix& d2psi) const
926 {
927  const size_t output_size = psi.cols();
928  std::copy_n(temp.data(0), output_size, psi[i]);
929  const ValueType* restrict gx = temp.data(1);
930  const ValueType* restrict gy = temp.data(2);
931  const ValueType* restrict gz = temp.data(3);
932  const ValueType* restrict hxx = temp.data(4);
933  const ValueType* restrict hxy = temp.data(5);
934  const ValueType* restrict hxz = temp.data(6);
935  const ValueType* restrict hyy = temp.data(7);
936  const ValueType* restrict hyz = temp.data(8);
937  const ValueType* restrict hzz = temp.data(9);
938 
939  for (size_t j = 0; j < output_size; j++)
940  {
941  dpsi[i][j][0] = gx[j];
942  dpsi[i][j][1] = gy[j];
943  dpsi[i][j][2] = gz[j];
944 
945  d2psi[i][j](0, 0) = hxx[j];
946  d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
947  d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
948  d2psi[i][j](1, 1) = hyy[j];
949  d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
950  d2psi[i][j](2, 2) = hzz[j];
951  }
952 }
953 
954 inline void LCAOrbitalSet::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const
955 {
956  const size_t output_size = dpsi.cols();
957  const ValueType* restrict gx = temp.data(1);
958  const ValueType* restrict gy = temp.data(2);
959  const ValueType* restrict gz = temp.data(3);
960 
961  for (size_t j = 0; j < output_size; j++)
962  {
963  //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
964  // for an atomic center, the ion gradient is the negative of the elecron gradient.
965  // Hence minus signs for each of these.
966  dpsi[i][j][0] = -gx[j];
967  dpsi[i][j][1] = -gy[j];
968  dpsi[i][j][2] = -gz[j];
969  }
970 }
971 
973  int i,
974  GradMatrix& dpsi,
975  HessMatrix& dgpsi,
976  GradMatrix& dlpsi) const
977 {
978  const size_t output_size = dpsi.cols();
979  const ValueType* restrict gx = temp.data(1);
980  const ValueType* restrict gy = temp.data(2);
981  const ValueType* restrict gz = temp.data(3);
982  const ValueType* restrict hxx = temp.data(4);
983  const ValueType* restrict hxy = temp.data(5);
984  const ValueType* restrict hxz = temp.data(6);
985  const ValueType* restrict hyy = temp.data(7);
986  const ValueType* restrict hyz = temp.data(8);
987  const ValueType* restrict hzz = temp.data(9);
988  const ValueType* restrict gh_xxx = temp.data(10);
989  const ValueType* restrict gh_xxy = temp.data(11);
990  const ValueType* restrict gh_xxz = temp.data(12);
991  const ValueType* restrict gh_xyy = temp.data(13);
992  const ValueType* restrict gh_xzz = temp.data(15);
993  const ValueType* restrict gh_yyy = temp.data(16);
994  const ValueType* restrict gh_yyz = temp.data(17);
995  const ValueType* restrict gh_yzz = temp.data(18);
996  const ValueType* restrict gh_zzz = temp.data(19);
997 
998  for (size_t j = 0; j < output_size; j++)
999  {
1000  //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that
1001  // for an atomic center, the ion gradient is the negative of the elecron gradient.
1002  // Hence minus signs for each of these.
1003  dpsi[i][j][0] = -gx[j];
1004  dpsi[i][j][1] = -gy[j];
1005  dpsi[i][j][2] = -gz[j];
1006 
1007  dgpsi[i][j](0, 0) = -hxx[j];
1008  dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
1009  dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
1010  dgpsi[i][j](1, 1) = -hyy[j];
1011  dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
1012  dgpsi[i][j](2, 2) = -hzz[j];
1013 
1014  //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector.
1015  dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
1016  dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
1017  dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
1018  }
1019 }
1020 
1022  int first,
1023  int last,
1024  ValueMatrix& logdet,
1025  GradMatrix& dlogdet,
1026  ValueMatrix& d2logdet)
1027 {
1028  if (Identity)
1029  {
1030  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1031  {
1032  myBasisSet->evaluateVGL(P, iat, Temp);
1033  evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
1034  }
1035  }
1036  else
1037  {
1038  assert(logdet.cols() <= OrbitalSetSize);
1039  ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
1040  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1041  {
1042  myBasisSet->evaluateVGL(P, iat, Temp);
1043  Product_ABt(Temp, C_partial_view, Tempv);
1044  evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
1045  }
1046  }
1047 }
1048 
1050  int first,
1051  int last,
1052  ValueMatrix& logdet,
1053  GradMatrix& dlogdet,
1054  HessMatrix& grad_grad_logdet)
1055 {
1056  if (Identity)
1057  {
1058  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1059  {
1060  myBasisSet->evaluateVGH(P, iat, Temph);
1061  evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
1062  }
1063  }
1064  else
1065  {
1066  assert(logdet.cols() <= OrbitalSetSize);
1067  ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
1068  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1069  {
1070  myBasisSet->evaluateVGH(P, iat, Temph);
1071  Product_ABt(Temph, C_partial_view, Temphv);
1072  evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
1073  }
1074  }
1075 }
1076 
1078  int first,
1079  int last,
1080  ValueMatrix& logdet,
1081  GradMatrix& dlogdet,
1082  HessMatrix& grad_grad_logdet,
1083  GGGMatrix& grad_grad_grad_logdet)
1084 {
1085  if (Identity)
1086  {
1087  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1088  {
1089  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
1090  evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
1091  }
1092  }
1093  else
1094  {
1095  assert(logdet.cols() <= OrbitalSetSize);
1096  ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
1097  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1098  {
1099  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
1100  Product_ABt(Tempgh, C_partial_view, Tempghv);
1101  evaluate_vghgh_impl(Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
1102  }
1103  }
1104 }
1105 
1107  int first,
1108  int last,
1109  const ParticleSet& source,
1110  int iat_src,
1111  GradMatrix& gradphi)
1112 {
1113  if (Identity)
1114  {
1115  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1116  {
1117  myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, Temp);
1118  evaluate_ionderiv_v_impl(Temp, i, gradphi);
1119  }
1120  }
1121  else
1122  {
1123  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1124  {
1125  myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, Temp);
1126  Product_ABt(Temp, *C, Tempv);
1127  evaluate_ionderiv_v_impl(Tempv, i, gradphi);
1128  }
1129  }
1130 }
1131 
1133  int first,
1134  int last,
1135  const ParticleSet& source,
1136  int iat_src,
1137  GradMatrix& grad_phi,
1138  HessMatrix& grad_grad_phi,
1139  GradMatrix& grad_lapl_phi)
1140 {
1141  if (Identity)
1142  {
1143  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1144  {
1145  myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, Tempgh);
1146  evaluate_ionderiv_vgl_impl(Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
1147  }
1148  }
1149  else
1150  {
1151  for (size_t i = 0, iat = first; iat < last; i++, iat++)
1152  {
1153  myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, Tempgh);
1155  evaluate_ionderiv_vgl_impl(Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
1156  // evaluate_vghgh_impl(Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
1157  }
1158  }
1159 }
1160 
1162  int iel,
1163  const ParticleSet& source,
1164  int iat_src,
1165  GradVector& gradphi)
1166 {
1167  if (Identity)
1168  {
1169  myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, Temp);
1171  }
1172  else
1173  {
1174  myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, Temp);
1175  Product_ABt(Temp, *C, Tempv);
1177  }
1178 }
1179 
1180 void LCAOrbitalSet::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
1181 {
1182  if (!use_stored_copy)
1183  *C_copy = *C;
1184  //gemm is out-of-place
1186  rot_mat.data(), OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize);
1187  C->updateTo();
1188 
1189  /* debugging code
1190  app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl;
1191  for (int j = 0; j < OrbitalSetSize; j++)
1192  for (int i = 0; i < BasisSetSize; i++)
1193  {
1194  app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific
1195  << *(C->data() + j * BasisSetSize + i);
1196 
1197  if ((j * BasisSetSize + i + 1) % 4 == 0)
1198  app_log() << std::endl;
1199  }
1200  */
1201 }
1202 
1203 } // namespace qmcplusplus
base class for Single-particle orbital sets
Definition: SPOSet.h:46
OrbitalSetTraits< ValueType >::HessVector HessVector
Definition: SPOSet.h:53
void mw_evaluateVGL(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const final
evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walker...
void gemm(BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const float &alpha, const float *A, int lda, const float *B, int ldb, const float &beta, float *C, int ldc)
Fixed-size array.
Definition: OhmmsTinyMeta.h:30
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
void takebackResource(ResourceHandle< RS > &res_handle)
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
Type_t * device_data()
Definition: OhmmsArray.h:90
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
QTBase::RealType RealType
Definition: Configuration.h:58
void evaluateGradSourceRow(const ParticleSet &P, int iel, const ParticleSet &source, int iat_src, GradVector &grad_phi) final
Returns a row of d/dR_iat phi_j(r) evaluated at position r.
Type_t * data_at(const std::array< SIZET, D > &indices)
Definition: OhmmsArray.h:104
std::unique_ptr< SPOSet > makeClone() const final
make a clone of itself every derived class must implement this to have threading working correctly...
void fill_n(T *x, size_t count, const T &value)
Definition: OMPstd.hpp:21
OffloadVector< const ValueType * > invRow_deviceptr_list
Type_t * data()
Definition: OhmmsArray.h:87
void evaluate_ionderiv_v_row_impl(const vgl_type &temp, GradVector &dlogdet) const
Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogd...
const bool Identity
true if C is an identity matrix
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
NewTimer & mo_timer_
timer for MO
std::unique_ptr< basis_type > myBasisSet
pointer to the basis set
Definition: LCAOrbitalSet.h:40
OrbitalSetTraits< ValueType >::ValueMatrix ValueMatrix
Definition: SPOSet.h:50
std::shared_ptr< OffloadValueMatrix > C_copy
a copy of the original C before orbital rotation is applied;
SoA adaptor class for Vector<TinyVector<T,D> >
int size() const
return the size of the orbital set Ye: this needs to be replaced by getOrbitalSetSize(); ...
Definition: SPOSet.h:75
std::unique_ptr< Resource > makeClone() const override
void applyRotation(const ValueMatrix &rot_mat, bool use_stored_copy) final
apply rotation to all the orbitals
void product_Atx(const Matrix< T > &A, const Vector< T > &x, Vector< T > &y)
static function to perform y = A^t x for generic matrix and vector
void evaluateGradSource(const ParticleSet &P, int first, int last, const ParticleSet &source, int iat_src, GradMatrix &grad_phi) final
Calculate ion derivatives of SPO&#39;s.
void evaluateVGL(const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) final
evaluate the values, gradients and laplacians of this single-particle orbital set ...
void evaluate_vghgh_impl(const vghgh_type &temp, ValueVector &psi, GradVector &dpsi, HessVector &d2psi, GGGVector &dghpsi) const
Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value...
LCAOrbitalSet(const std::string &my_name, std::unique_ptr< basis_type > &&bs, size_t norbs, bool identity, bool use_offload)
constructor
void mw_evaluateVGLImplGEMM(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, OffloadMWVGLArray &phi_vgl_v) const
vgh_type Temph
These are temporary VectorSoAContainers to hold value, gradient, and hessian for all basis or SPO fun...
void evaluateVGH(const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) final
evaluate the values, gradients and hessians of this single-particle orbital set
OrbitalSetTraits< ValueType >::GradMatrix GradMatrix
Definition: SPOSet.h:52
constexpr std::complex< double > zone
Definition: BLAS.hpp:52
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
virtual void mw_evaluateValue(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list) const
evaluate the values this single-particle orbital sets of multiple walkers
Definition: SPOSet.cpp:105
Array< ValueType, 3, OffloadPinnedAllocator< ValueType > > OffloadMWVGLArray
Definition: SPOSet.h:58
void finalizeConstruction() override
update C on device
void evaluateValue(const ParticleSet &P, int iat, ValueVector &psi) final
evaluate the values of this single-particle orbital set
QTBase::ValueType ValueType
Definition: Configuration.h:60
void product(const Matrix< T > &A, const Matrix< T > &B, Matrix< T > &C)
static function to perform C=AB for real matrices
Array< ValueType, 2, OffloadPinnedAllocator< ValueType > > OffloadMWVArray
Definition: SPOSet.h:59
const IndexType BasisSetSize
number of Single-particle orbitals
virtual void mw_evaluateVGL(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walker...
Definition: SPOSet.cpp:93
virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const
evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ra...
Definition: SPOSet.cpp:126
OrbitalSetTraits< ValueType >::ValueVector ValueVector
Definition: SPOSet.h:49
CASTTYPE & getCastedElement(size_t i) const
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
std::shared_ptr< OffloadValueMatrix > C
pointer to matrix containing the coefficients
Definition: LCAOrbitalSet.h:42
vgl_type Tempv
Tempv(OrbitalSetSize) Tempv=C*Temp.
RefVectorWithLeader< basis_type > extractBasisRefList(const RefVectorWithLeader< SPOSet > &spo_list) const
helper function for extracting a list of basis sets from a list of LCAOrbitalSet
size_t size() const
Definition: OhmmsArray.h:57
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
void mw_evaluateValue(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list) const final
evaluate the values this single-particle orbital sets of multiple walkers
void evaluate_vgh_impl(const vgh_type &temp, ValueVector &psi, GradVector &dpsi, HessVector &d2psi) const
These two functions unpack the data in vgh_type temp object into wavefunction friendly data structure...
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const final
acquire a shared resource from collection
void mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, OffloadMWVArray &phi_v) const
packed walker GEMM implementation with multi virtual particle sets
void checkObject() const final
check consistency between Identity and C
vghgh_type Tempghv
Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)].
void evaluateDetRatios(const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) final
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
LCAOMultiWalkerMem(const LCAOMultiWalkerMem &)
compute::Queue< PlatformKind::OMPTARGET > queue
void Product_ABt(const VectorSoaContainer< T, D > &A, const Matrix< T, Alloc > &B, VectorSoaContainer< T, D > &C)
Find a better place for other user classes, Matrix should be padded as well.
std::vector< std::reference_wrapper< T > > RefVector
vgh_type Temphv
Norbitals x [1(value)+3(gradient)+6(hessian)].
compute::BLASHandle< PlatformKind::OMPTARGET > blas_handle
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const final
return a shared resource to collection
ResourceHandle< LCAOMultiWalkerMem > mw_mem_handle_
OrbitalSetTraits< ValueType >::GradHessVector GGGVector
Definition: SPOSet.h:55
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
void evaluate_ionderiv_v_impl(const vgl_type &temp, int i, GradMatrix &dlogdet) const
Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet.
OrbitalSetTraits< ValueType >::GradVector GradVector
Definition: SPOSet.h:51
void createResource(ResourceCollection &collection) const final
initialize a shared resource and hand it to collection
const bool useOMPoffload_
whether offload is on or off at runtime.
void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const final
evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ra...
static void copy(int n, const T *restrict a, T *restrict b)
Definition: BLAS.hpp:381
virtual void mw_evaluateDetRatios(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers ...
Definition: SPOSet.cpp:69
void evaluate_ionderiv_vgl_impl(const vghgh_type &temp, int i, GradMatrix &dlogdet, HessMatrix &dglogdet, GradMatrix &dllogdet) const
Unpacks data in vgl object and calculates/places ionic gradient of value, electron gradient...
void evaluate_vgl_impl(const vgl_type &temp, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) const
helper functions to handle Identity
vghgh_type Tempgh
These are temporary VectorSoAContainers to hold value, gradient, hessian, and gradient hessian for al...
vgl_type Temp
Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L.
static size_t countVPs(const RefVectorWithLeader< const VirtualParticleSet > &vp_list)
class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants.
Definition: LCAOrbitalSet.h:30
ResourceHandle< RS > lendResource()
void setOrbitalSetSize(int norbs) final
set the OrbitalSetSize and Identity=false and initialize internal storages
static void gemm(char Atrans, char Btrans, int M, int N, int K, double alpha, const double *A, int lda, const double *restrict B, int ldb, double beta, double *restrict C, int ldc)
Definition: BLAS.hpp:235
ompBLAS_status copy(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
double B(double x, int k, int i, const std::vector< double > &t)
void evaluate_notranspose(const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet) final
evaluate the values, gradients and laplacians of this single-particle orbital for [first...
OrbitalSetTraits< ValueType >::GradHessMatrix GGGMatrix
Definition: SPOSet.h:56
void resize(size_type n)
resize myData
NewTimer & basis_timer_
timer for basis set
A D-dimensional Array class based on PETE.
Definition: OhmmsArray.h:25
void mw_evaluateValueImplGEMM(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, OffloadMWVArray &phi_v) const
packed walker GEMM implementation
void mw_evaluateDetRatios(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const final
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers ...
void evaluateVGHGH(const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) final
evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set ...
OrbitalSetTraits< ValueType >::HessMatrix HessMatrix
Definition: SPOSet.h:54