42 const size_t num = psi.
extent(1);
43 const size_t npairs = pairs.size();
45 const int* first = pairs.data(0);
46 const int*
second = pairs.data(1);
47 for (
size_t i = 0; i < npairs; ++i)
49 const int I = first[i];
57 const int* it2 = data.
data();
58 const size_t nitems =
sign.size();
61 for (
size_t count = 0; count < nitems; ++count)
63 const size_t n = *it2;
65 ratios[count] =
sign[count] * det0 *
87 const size_t nw = ratios_list.size();
92 const size_t nb_cols_table_matrix(table_matrix_list[0].
get().cols());
96 const size_t npairs = pairs.size();
97 const size_t num = psi_list[0].get().extent(1);
98 const size_t nitems =
sign.size();
100 const int* first = pairs.data(0);
101 const int*
second = pairs.data(1);
103 psiinv_deviceptr_list.resize(nw);
104 psi_deviceptr_list.resize(nw);
105 table_matrix_deviceptr_list.resize(nw);
106 ratios_deviceptr_list.resize(nw);
108 for (
size_t iw = 0; iw < nw; iw++)
110 psiinv_deviceptr_list[iw] = psiinv_list[iw].get().device_data();
111 psi_deviceptr_list[iw] = psi_list[iw].get().device_data();
112 table_matrix_deviceptr_list[iw] = table_matrix_list[iw].get().device_data();
113 ratios_deviceptr_list[iw] = ratios_list[iw].get().device_data();
116 const size_t nb_cols_psi(psi_list[0].
get().cols());
117 const size_t nb_cols_psiinv(psiinv_list[0].
get().cols());
119 auto* ratios_list_ptr = ratios_deviceptr_list.data();
120 auto* table_matrix_list_ptr = table_matrix_deviceptr_list.data();
121 const auto* psiinv_list_ptr = psiinv_deviceptr_list.data();
122 const auto* psi_list_ptr = psi_deviceptr_list.data();
126 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) \ 127 map(always, to: psiinv_list_ptr[:nw], psi_list_ptr[:nw]) \ 128 map(always, to: ratios_list_ptr[:nw], table_matrix_list_ptr[:nw]) \ 129 map(to:first[:npairs], second[:npairs])")
130 for (uint32_t iw = 0; iw < nw; iw++)
131 for (uint32_t i = 0; i < npairs; ++i)
133 const int I = first[i];
137 for (uint32_t ind = 0; ind < num; ind++)
138 table_matrix_local +=
139 psiinv_list_ptr[iw][
I * nb_cols_psiinv + ind] * psi_list_ptr[iw][
J * nb_cols_psi + ind];
140 table_matrix_list_ptr[iw][
I * nb_cols_table_matrix +
J] = table_matrix_local;
150 size_t det_offset = 1;
151 size_t data_offset = 1;
153 auto update_offsets = [&](
size_t ext_level) {
154 det_offset += (*ndets_per_excitation_level_)[ext_level];
155 data_offset += (*ndets_per_excitation_level_)[ext_level] * (3 * ext_level + 1);
160 if (max_ext_level >= 1)
162 mw_updateRatios<1>(det_offset, data_offset, data,
sign, table_matrix_deviceptr_list, nb_cols_table_matrix,
163 ratios_deviceptr_list);
167 if (max_ext_level >= 2)
169 mw_updateRatios<2>(det_offset, data_offset, data,
sign, table_matrix_deviceptr_list, nb_cols_table_matrix,
170 ratios_deviceptr_list);
174 if (max_ext_level >= 3)
176 mw_updateRatios<3>(det_offset, data_offset, data,
sign, table_matrix_deviceptr_list, nb_cols_table_matrix,
177 ratios_deviceptr_list);
181 if (max_ext_level >= 4)
183 mw_updateRatios<4>(det_offset, data_offset, data,
sign, table_matrix_deviceptr_list, nb_cols_table_matrix,
184 ratios_deviceptr_list);
188 if (max_ext_level >= 5)
190 mw_updateRatios<5>(det_offset, data_offset, data,
sign, table_matrix_deviceptr_list, nb_cols_table_matrix,
191 ratios_deviceptr_list);
195 if (max_ext_level >= 6)
197 for (
size_t iw = 0; iw < nw; iw++)
198 table_matrix_list[iw].
get().updateFrom();
199 for (
size_t ext_level = 6; ext_level <= max_ext_level; ext_level++)
203 update_offsets(ext_level);
239 table_matrix_list, ratios_list);
243 for (
size_t iw = 0; iw < ratios_list.size(); iw++)
244 ratios_list[iw].
get().updateFrom();
263 for (
size_t count = 0; count <
getNumDets(); ++count)
285 table_matrix_list, WorkSpace_list);
287 const size_t nw = WorkSpace_list.size();
289 for (
size_t iw = 0; iw < nw; iw++)
290 WorkSpace_deviceptr_list[iw] = WorkSpace_list[iw].
get().
device_data();
292 auto* WorkSpace_list_ptr = WorkSpace_deviceptr_list.
data();
293 auto* mw_grads_ptr = mw_grads.
data();
294 const size_t Grads_cols = mw_grads.
cols();
296 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) map(from:mw_grads_ptr[:mw_grads.size()]) \ 297 map(always, to:WorkSpace_list_ptr[:nw])")
298 for (uint32_t iw = 0; iw < nw; iw++)
299 for (uint32_t count = 0; count <
getNumDets; ++count)
300 mw_grads_ptr[(3 * iw + dx) * Grads_cols + count] = WorkSpace_list_ptr[iw][count];
317 for (
size_t count = 0; count <
getNumDets(); ++count)
321 int num=psi.extent(1);
322 std::vector<std::pair<int,int> >::iterator it(pairs.begin()), last(pairs.end());
328 std::vector<int>::iterator it2 = data.begin();
330 while(it2 != data.end())
350 const int nw = det_list.size();
360 phi_list.reserve(nw);
361 psiV_list.reserve(nw);
362 psiMinv_list.reserve(nw);
363 psiMinv_temp_list.reserve(nw);
364 table_matrix_list.reserve(nw);
365 TpsiM_list.reserve(nw);
366 psiM_list.reserve(nw);
367 new_ratios_to_ref_list.reserve(nw);
369 for (
size_t iw = 0; iw < nw; iw++)
373 phi_list.push_back(*
det.Phi);
374 psiV_list.push_back(
det.psiV);
375 psiMinv_list.push_back(
det.psiMinv);
376 psiM_list.push_back(
det.psiM);
377 psiMinv_temp_list.push_back(
det.psiMinv_temp);
378 new_ratios_to_ref_list.push_back(
det.new_ratios_to_ref_);
379 table_matrix_list.push_back(
det.table_matrix);
380 TpsiM_list.push_back(
det.TpsiM);
385 const int WorkingIndex = iat - det_leader.
FirstIndex;
389 for (
size_t iw = 0; iw < nw; iw++)
392 Vector<ValueType> psiV_list_host_view(psiV_list[iw].
get().data(), psiV_list[iw].
get().size());
393 det.getPhi()->evaluateValue(P_list[iw], iat, psiV_list_host_view);
397 psiV_list[iw].get().updateTo();
403 int dummy_handle = 0;
404 const auto psiMinv_rows = psiMinv_list[0].get().rows();
405 const auto psiMinv_cols = psiMinv_list[0].get().cols();
406 const auto TpsiM_cols = TpsiM_list[0].get().cols();
407 const auto psiM_cols = psiM_list[0].get().cols();
408 const auto TpsiM_rows = TpsiM_list[0].get().rows();
414 auto* psiV_list_devptr = mw_res.psiV_deviceptr_list.device_data();
415 auto* psiV_temp_list_ptr = mw_res.psiV_temp_deviceptr_list.data();
417 auto* psiMinv_list_devptr = mw_res.psiMinv_deviceptr_list.device_data();
418 auto* psiMinv_temp_list_devptr = mw_res.psiMinv_temp_deviceptr_list.device_data();
420 auto* TpsiM_list_devptr = mw_res.TpsiM_deviceptr_list.device_data();
421 auto* psiM_list_ptr = mw_res.psiM_deviceptr_list.data();
423 auto& curRatio_list = mw_res.curRatio_list;
424 curRatio_list.resize(nw);
425 auto* curRatio_list_ptr = curRatio_list.data();
427 auto& inv_curRatio_list = mw_res.inv_curRatio_list;
428 inv_curRatio_list.resize(nw);
429 auto* inv_curRatio_list_ptr = inv_curRatio_list.data();
431 auto* confgListOccup_ptr = det_leader.
refdet_occup->data();
435 psiMinv_temp_list_devptr, 1, nw);
437 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
440 TpsiM_list_devptr, WorkingIndex, TpsiM_cols, nw);
442 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
444 PRAGMA_OFFLOAD(
"omp target teams distribute map(always, from:curRatio_list_ptr[:nw]) \ 445 is_device_ptr(psiV_list_devptr, psiMinv_temp_list_devptr)")
446 for (uint32_t iw = 0; iw < nw; iw++)
449 PRAGMA_OFFLOAD(
"omp parallel for reduction(+ : c_ratio)")
450 for (uint32_t jc = 0; jc <
NumPtcls; jc++)
452 const size_t J = confgListOccup_ptr[jc];
453 psiV_temp_list_ptr[iw][jc] = psiV_list_devptr[iw][
J];
454 size_t ic = jc * psiMinv_cols;
455 c_ratio += (psiMinv_temp_list_devptr[iw] + WorkingIndex)[ic] * psiV_temp_list_ptr[iw][jc];
457 curRatio_list_ptr[iw] = c_ratio;
458 inv_curRatio_list_ptr[iw] =
ValueType(1) / c_ratio;
462 mw_res.psiV_temp_deviceptr_list, mw_res.psiMinv_temp_deviceptr_list,
467 for (
size_t iw = 0; iw < nw; iw++)
468 psiMinv_temp_list[iw].
get().updateFrom();
471 auto& det0_list = mw_res.cone_vec;
473 det0_list, psiMinv_temp_list, TpsiM_list, *det_leader.
detData,
475 new_ratios_to_ref_list);
478 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) is_device_ptr(TpsiM_list_devptr) \ 479 map(always, to:psiM_list_ptr[:nw])")
480 for (uint32_t iw = 0; iw < nw; iw++)
482 TpsiM_list_devptr[iw][i * TpsiM_cols + WorkingIndex] = psiM_list_ptr[iw][i + psiM_cols * WorkingIndex];
485 for (
size_t iw = 0; iw < nw; iw++)
488 det.curRatio = curRatio_list_ptr[iw];
500 Phi->evaluateValue(P, iat, psiV_host_view);
502 const int WorkingIndex = (refPtcl < 0 ? iat : refPtcl) -
FirstIndex;
516 for (
size_t i = 0; i <
NumPtcls; i++)
527 TpsiM(i, WorkingIndex) =
psiM(WorkingIndex, i);
540 Phi->evaluateVGL(P, iat, psiV_host_view, dpsiV_host_view, d2psiV_host_view);
553 for (
size_t i = 0; i <
NumPtcls; i++)
566 for (
size_t idim = 0; idim <
OHMMS_DIM; idim++)
573 for (
size_t i = 0; i <
NumPtcls; i++)
584 TpsiM(i, WorkingIndex) =
psiM(WorkingIndex, i);
598 Phi->evaluateVGL_spin(P, iat, psiV_host_view, dpsiV_host_view, d2psiV_host_view,
dspin_psiV);
611 for (
size_t i = 0; i <
NumPtcls; i++)
626 for (
size_t idim = 0; idim <
OHMMS_DIM; idim++)
633 for (
size_t i = 0; i <
NumPtcls; i++)
648 for (
size_t i = 0; i <
NumPtcls; i++)
659 TpsiM(i, WorkingIndex) =
psiM(WorkingIndex, i);
668 const int nw = det_list.size();
674 int dummy_handle = 0;
686 phi_list.reserve(nw);
687 psiV_list.reserve(nw);
688 dpsiV_list.reserve(nw);
689 d2psiV_list.reserve(nw);
690 psiV_temp_list.reserve(nw);
691 psiMinv_temp_list.reserve(nw);
692 psiMinv_list.reserve(nw);
693 psiM_list.reserve(nw);
695 TpsiM_list.reserve(nw);
696 new_ratios_to_ref_list.reserve(nw);
697 table_matrix_list.reserve(nw);
698 dpsiMinv_list.reserve(nw);
699 WorkSpace_list.reserve(nw);
702 auto& ratioGradRef_list = mw_res.ratioGradRef_list;
703 auto& det0_grad_list = mw_res.det0_grad_list;
704 ratioGradRef_list.resize(nw);
705 det0_grad_list.resize(nw);
708 const int WorkingIndex = iat - det_leader.
FirstIndex;
710 for (
size_t iw = 0; iw < nw; iw++)
714 phi_list.push_back(*
det.Phi);
715 psiV_list.push_back(
det.psiV);
716 dpsiV_list.push_back(
det.dpsiV);
717 d2psiV_list.push_back(
det.d2psiV);
718 psiV_temp_list.push_back(
det.psiV_temp);
719 psiMinv_list.push_back(
det.psiMinv);
720 psiM_list.push_back(
det.psiM);
721 psiMinv_temp_list.push_back(
det.psiMinv_temp);
722 new_ratios_to_ref_list.push_back(
det.new_ratios_to_ref_);
723 TpsiM_list.push_back(
det.TpsiM);
724 table_matrix_list.push_back(
det.table_matrix);
725 dpsiMinv_list.push_back(
det.dpsiMinv);
726 WorkSpace_list.push_back(
det.WorkSpace);
731 for (
size_t iw = 0; iw < nw; iw++)
734 Vector<ValueType> psiV_list_host_view(psiV_list[iw].
get().data(), psiV_list[iw].
get().size());
735 Vector<GradType> dpsiV_list_host_view(dpsiV_list[iw].
get().data(), dpsiV_list[iw].
get().size());
736 Vector<ValueType> d2psiV_list_host_view(d2psiV_list[iw].
get().data(), d2psiV_list[iw].
get().size());
737 det.Phi->evaluateVGL(P_list[iw], iat, psiV_list_host_view, dpsiV_list_host_view, d2psiV_list_host_view);
741 psiV_list[iw].get().updateTo();
742 dpsiV_list[iw].get().updateTo();
747 const auto psiMinv_rows = psiMinv_list[0].get().rows();
748 const auto psiMinv_cols = psiMinv_list[0].get().cols();
749 const auto TpsiM_num_cols = TpsiM_list[0].get().cols();
750 const auto psiM_num_cols = psiM_list[0].get().cols();
753 auto* psiV_list_devptr = mw_res.psiV_deviceptr_list.device_data();
754 auto* psiV_temp_list_ptr = mw_res.psiV_temp_deviceptr_list.data();
755 auto* TpsiM_list_devptr = mw_res.TpsiM_deviceptr_list.device_data();
756 auto* psiM_list_ptr = mw_res.psiM_deviceptr_list.data();
757 auto* psiMinv_list_devptr = mw_res.psiMinv_deviceptr_list.device_data();
758 auto* dpsiMinv_list_devptr = mw_res.dpsiMinv_deviceptr_list.device_data();
759 auto* psiMinv_temp_list_devptr = mw_res.psiMinv_temp_deviceptr_list.device_data();
761 auto& curRatio_list = mw_res.curRatio_list;
762 curRatio_list.resize(nw);
763 auto* curRatio_list_ptr = curRatio_list.data();
765 auto& inv_curRatio_list = mw_res.inv_curRatio_list;
766 inv_curRatio_list.resize(nw);
767 auto* inv_curRatio_list_ptr = inv_curRatio_list.data();
769 auto* det0_grad_list_ptr = det0_grad_list.data();
770 auto* confgListOccup_ptr = det_leader.
refdet_occup->data();
772 auto* dpsiV_list_ptr = mw_res.dpsiV_deviceptr_list.data();
773 auto* ratioGradRef_list_ptr = ratioGradRef_list.data();
777 psiMinv_temp_list_devptr, 1, nw);
779 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
784 PRAGMA_OFFLOAD(
"omp target teams distribute is_device_ptr(psiV_list_devptr, psiMinv_temp_list_devptr) \ 785 map(always, from:curRatio_list_ptr[:nw])")
786 for (uint32_t iw = 0; iw < nw; iw++)
789 PRAGMA_OFFLOAD(
"omp parallel for reduction(+ : ratioGradRef_local)")
790 for (uint32_t i = 0; i <
NumPtcls; i++)
792 const size_t J = confgListOccup_ptr[i];
793 psiV_temp_list_ptr[iw][i] = psiV_list_devptr[iw][
J];
794 ratioGradRef_local += psiMinv_temp_list_devptr[iw][i * psiMinv_cols + WorkingIndex] * dpsiV_list_ptr[iw][
J];
801 ratioGradRef_list_ptr[iw][0] = ratioGradRef_local[0];
802 ratioGradRef_list_ptr[iw][1] = ratioGradRef_local[1];
803 ratioGradRef_list_ptr[iw][2] = ratioGradRef_local[2];
806 PRAGMA_OFFLOAD(
"omp parallel for reduction(+ : c_ratio)")
807 for (uint32_t jc = 0; jc < psiMinv_cols; jc += 1)
809 const size_t ic = jc * psiMinv_cols;
810 c_ratio += (psiMinv_temp_list_devptr[iw] + WorkingIndex)[ic] * psiV_temp_list_ptr[iw][jc];
812 curRatio_list_ptr[iw] = c_ratio;
813 inv_curRatio_list_ptr[iw] =
ValueType(1) / c_ratio;
817 TpsiM_list_devptr, WorkingIndex, TpsiM_num_cols, nw);
819 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
823 mw_res.psiV_temp_deviceptr_list, mw_res.psiMinv_temp_deviceptr_list,
829 for (
size_t iw = 0; iw < nw; iw++)
830 psiMinv_temp_list[iw].
get().updateFrom();
833 auto& det0_list = mw_res.cone_vec;
835 det0_list, psiMinv_temp_list, TpsiM_list, *det_leader.
detData,
837 new_ratios_to_ref_list);
839 for (
size_t idim = 0; idim <
OHMMS_DIM; idim++)
842 dpsiMinv_list_devptr, 1, nw);
844 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
846 PRAGMA_OFFLOAD(
"omp target teams distribute map(to: ratioGradRef_list_ptr[:nw])")
847 for (uint32_t iw = 0; iw < nw; iw++)
849 inv_curRatio_list_ptr[iw] =
ValueType(1) / ratioGradRef_list_ptr[iw][idim];
851 for (uint32_t i = 0; i <
NumPtcls; i++)
853 const size_t J = confgListOccup_ptr[i];
854 psiV_temp_list_ptr[iw][i] = dpsiV_list_ptr[iw][
J][idim];
859 mw_res.psiV_temp_deviceptr_list, mw_res.dpsiMinv_deviceptr_list,
862 PRAGMA_OFFLOAD(
"omp target teams distribute map(to:dpsiV_list_ptr[:nw], curRatio_list_ptr[:nw]) \ 863 map(always,from:det0_grad_list_ptr[:nw]) \ 864 is_device_ptr(TpsiM_list_devptr)")
865 for (uint32_t iw = 0; iw < nw; iw++)
867 det0_grad_list_ptr[iw] = ratioGradRef_list_ptr[iw][idim] / curRatio_list_ptr[iw];
869 TpsiM_list_devptr[iw][i * TpsiM_num_cols + WorkingIndex] = dpsiV_list_ptr[iw][i][idim];
873 WorkingIndex, idim, det_leader.
getNumDets(), det0_grad_list,
874 dpsiMinv_list, TpsiM_list, *det_leader.
detData,
876 table_matrix_list, mw_grads);
880 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) is_device_ptr(TpsiM_list_devptr) \ 881 map(always, to:psiM_list_ptr[:nw])")
882 for (uint32_t iw = 0; iw < nw; iw++)
884 TpsiM_list_devptr[iw][i * TpsiM_num_cols + WorkingIndex] = psiM_list_ptr[iw][i + psiM_num_cols * WorkingIndex];
887 for (
size_t iw = 0; iw < nw; iw++)
890 det.curRatio = curRatio_list[iw];
900 for (
size_t idim = 0; idim <
OHMMS_DIM; idim++)
906 for (
size_t i = 0; i <
NumPtcls; i++)
909 ratioG +=
psiMinv(i, WorkingIndex) *
dpsiM(WorkingIndex, *it)[idim];
914 TpsiM(i, WorkingIndex) =
dpsiM(WorkingIndex, i)[idim];
920 TpsiM(i, WorkingIndex) =
psiM(WorkingIndex, i);
930 for (
size_t idim = 0; idim <
OHMMS_DIM; idim++)
936 for (
size_t i = 0; i <
NumPtcls; i++)
939 ratioG +=
psiMinv(i, WorkingIndex) *
dpsiM(WorkingIndex, *it)[idim];
944 TpsiM(i, WorkingIndex) =
dpsiM(WorkingIndex, i)[idim];
953 for (
size_t i = 0; i <
NumPtcls; i++)
968 TpsiM(i, WorkingIndex) =
psiM(WorkingIndex, i);
976 const int nw = det_list.size();
978 const int WorkingIndex = iat - det_leader.
FirstIndex;
980 int dummy_handle = 0;
992 psiMinv_list.reserve(nw);
993 dpsiMinv_list.reserve(nw);
994 dpsiM_list.reserve(nw);
995 psiV_temp_list.reserve(nw);
997 table_matrix_list.reserve(nw);
998 TpsiM_list.reserve(nw);
999 psiM_list.reserve(nw);
1000 WorkSpace_list.reserve(nw);
1002 for (
size_t iw = 0; iw < nw; iw++)
1005 psiMinv_list.push_back(
det.psiMinv);
1006 dpsiMinv_list.push_back(
det.dpsiMinv);
1007 psiV_temp_list.push_back(
det.psiV_temp);
1008 dpsiM_list.push_back(
det.dpsiM);
1010 TpsiM_list.push_back(
det.TpsiM);
1011 psiM_list.push_back(
det.psiM);
1012 table_matrix_list.push_back(
det.table_matrix);
1013 WorkSpace_list.push_back(
det.WorkSpace);
1016 const auto psiMinv_rows = psiMinv_list[0].get().rows();
1017 const auto psiMinv_cols = psiMinv_list[0].get().cols();
1018 const auto TpsiM_cols = TpsiM_list[0].get().cols();
1019 const auto psiM_cols = psiM_list[0].get().cols();
1020 const auto dpsiM_cols = dpsiM_list[0].get().cols();
1021 const auto dpsiM_rows = dpsiM_list[0].get().rows();
1025 auto& ratioG_list = mw_res.curRatio_list;
1026 ratioG_list.resize(nw);
1027 auto* ratioG_list_ptr = ratioG_list.data();
1029 auto& inv_ratioG_list = mw_res.inv_curRatio_list;
1030 inv_ratioG_list.resize(nw);
1031 auto* inv_ratioG_list_ptr = inv_ratioG_list.data();
1033 auto* psiMinv_list_devptr = mw_res.psiMinv_deviceptr_list.device_data();
1034 auto* dpsiMinv_list_devptr = mw_res.dpsiMinv_deviceptr_list.device_data();
1035 auto* TpsiM_list_devptr = mw_res.TpsiM_deviceptr_list.data();
1036 auto* psiM_list_ptr = mw_res.psiM_deviceptr_list.data();
1037 auto* psiV_temp_list_ptr = mw_res.psiV_temp_deviceptr_list.data();
1038 auto* dpsiM_list_ptr = mw_res.dpsiM_deviceptr_list.data();
1039 auto* confgListOccup_ptr = det_leader.
refdet_occup->data();
1042 for (
size_t idim = 0; idim <
OHMMS_DIM; idim++)
1045 dpsiMinv_list_devptr, 1, nw);
1047 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
1049 PRAGMA_OFFLOAD(
"omp target teams distribute map(always, to: psiV_temp_list_ptr[:nw]) \ 1050 map(always, to: dpsiM_list_ptr[:nw])")
1051 for (uint32_t iw = 0; iw < nw; iw++)
1052 for (uint32_t i = 0; i <
NumPtcls; i++)
1054 size_t J = confgListOccup_ptr[i];
1055 psiV_temp_list_ptr[iw][i] = dpsiM_list_ptr[iw][WorkingIndex * dpsiM_cols +
J][idim];
1058 PRAGMA_OFFLOAD(
"omp target teams distribute is_device_ptr(psiMinv_list_devptr) \ 1059 map(always, from: ratioG_list_ptr[:nw])")
1060 for (uint32_t iw = 0; iw < nw; iw++)
1063 PRAGMA_OFFLOAD(
"omp parallel for reduction(+ : ratioG_local)")
1064 for (uint32_t i = 0; i <
NumPtcls; i++)
1066 size_t J = confgListOccup_ptr[i];
1067 ratioG_local += psiMinv_list_devptr[iw][i * psiMinv_cols + WorkingIndex] *
1068 dpsiM_list_ptr[iw][WorkingIndex * dpsiM_cols +
J][idim];
1070 ratioG_list_ptr[iw] = ratioG_local;
1071 inv_ratioG_list_ptr[iw] =
ValueType(1) / ratioG_local;
1075 mw_res.psiV_temp_deviceptr_list, mw_res.dpsiMinv_deviceptr_list,
1078 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) map(to:dpsiM_list_ptr[:nw]) \ 1079 map(always, to: TpsiM_list_devptr[:nw])")
1080 for (uint32_t iw = 0; iw < nw; iw++)
1082 TpsiM_list_devptr[iw][i * TpsiM_cols + WorkingIndex] =
1083 dpsiM_list_ptr[iw][dpsiM_cols * WorkingIndex + i][idim];
1087 WorkingIndex, idim, det_leader.
getNumDets(), ratioG_list,
1088 dpsiMinv_list, TpsiM_list, *det_leader.
detData,
1090 table_matrix_list, mw_grads);
1094 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for map(from:TpsiM_list_devptr[:nw]) \ 1095 map(always,to:psiM_list_ptr[:nw])")
1096 for (uint32_t iw = 0; iw < nw; iw++)
1098 TpsiM_list_devptr[iw][i * TpsiM_cols + WorkingIndex] = psiM_list_ptr[iw][i + psiM_cols * WorkingIndex];
1103 const size_t det_offset,
1104 const size_t data_offset,
1111 const size_t nw = ratios_list.size();
1112 const int* it2 = data.
data() + data_offset;
1113 for (
size_t iw = 0; iw < nw; iw++)
1114 for (
size_t count = 0; count < (*ndets_per_excitation_level_)[ext_level]; ++count)
1116 size_t det_id = det_offset + count;
1117 ratios_list[iw].get()[det_id] =
sign[det_id] * ratios_list[iw].get()[0] *
1118 det_calculator.
evaluate(table_matrix_list[iw].
get(), it2 + 1 + count * (3 * ext_level + 1), ext_level);
1127 const size_t nw = ratios_deviceptr_list.
size();
1128 auto* ratios_list_ptr = ratios_deviceptr_list.
data();
1129 const auto* det0_list_ptr = det0_list.
data();
1131 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for map(always, to: det0_list_ptr[:nw])")
1132 for (uint32_t iw = 0; iw < nw; iw++)
1133 ratios_list_ptr[iw][0] = det0_list_ptr[iw];
1136 template<
unsigned EXT_LEVEL>
1138 const size_t data_offset,
1142 const size_t num_table_matrix_cols,
1145 const size_t nw = ratios_deviceptr_list.
size();
1146 const size_t size_sign =
sign.size();
1147 const size_t ndet_ext = (*ndets_per_excitation_level_)[EXT_LEVEL];
1151 auto* ratios_list_ptr = ratios_deviceptr_list.
data();
1152 const auto* sign_ptr =
sign.data();
1153 const int* data_ptr = data.
data();
1154 const auto* table_matrix_list_ptr = table_matrix_deviceptr_list.
data();
1156 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2)")
1157 for (uint32_t iw = 0; iw < nw; iw++)
1158 for (uint32_t count = 0; count < ndet_ext; ++count)
1160 size_t det_id = det_offset + count;
1161 ratios_list_ptr[iw][det_id] = sign_ptr[det_id] * ratios_list_ptr[iw][0] *
1163 (data_ptr + data_offset) + 1 + count * (3 * EXT_LEVEL + 1),
1164 num_table_matrix_cols);
1169 const int working_index,
1173 const size_t psiMinv_rows)
const 1179 const size_t nw = inv_curRatio_list.
size();
1185 int dummy_handle = 0;
1187 auto* psiV_list_devptr = psiV_deviceptr_list.
device_data();
1188 auto* psiMinv_list_devptr = psiMinv_deviceptr_list.
device_data();
1191 auto* inv_curRatio_list_devptr = inv_curRatio_list.
device_data();
1195 psiMinv_list_devptr, psiMinv_rows, psiV_list_devptr, 1, czero_ptr, workV1_list_ptr, 1, nw);
1197 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::gemv_batched failed.");
1199 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for is_device_ptr(workV1_list_ptr, inv_curRatio_list_devptr)")
1200 for (uint32_t iw = 0; iw < nw; iw++)
1201 workV1_list_ptr[iw][working_index] =
cone - inv_curRatio_list_devptr[iw];
1203 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
1206 workV2_list_ptr, 0, 1, nw);
1208 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");
1210 success =
ompBLAS::ger_batched(dummy_handle, psiMinv_rows, psiMinv_rows, cminus_one_ptr, workV1_list_ptr, 1,
1211 workV2_list_ptr, 1, psiMinv_list_devptr, psiMinv_rows, nw);
1213 throw std::runtime_error(
"In MultiDiracDeterminant ompBLAS::ger_batched failed.");
NewTimer & evaluateGrads_timer
std::shared_ptr< OffloadVector< size_t > > refdet_occup
reference determinant occupation
OffloadMatrix< ValueType > dpsiMinv
NewTimer & updateInverse_timer
std::shared_ptr< std::vector< ci_configuration2 > > ciConfigList
use shared_ptr
Matrix< GradType > new_grads
helper functions for EinsplineSetBuilder
std::shared_ptr< OffloadVector< int > > detData
Tensor< T, D >::Type_t det(const Tensor< T, D > &a)
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
const int NumPtcls
number of particles which belong to this Dirac determinant
const std::unique_ptr< SPOSet > Phi
a set of single-particle orbitals used to fill in the values of the matrix
OffloadMatrix< GradType > dpsiM
dpsiM(i,j)
NewTimer & calculateRatios_timer
constexpr std::complex< float > cone
pointer device_data()
Return the device_ptr matching X if this is a vector attached or owning dual space memory...
OffloadVector< ValueType > czero_vec
OffloadMatrix< ValueType > psiMinv_temp
Matrix< GradType > grads
store determinant grads (old and new)
LatticeGaussianProduct::GradType GradType
std::shared_ptr< std::vector< int > > ndets_per_excitation_level_
number of unique determinants at each excitation level (relative to reference) {1, n_singles, n_doubles, n_triples, ...}
SoA adaptor class for Vector<TinyVector<T,D> >
void mw_buildTableMatrix_calculateGradRatios(MultiDiracDetMultiWalkerResource &mw_res, int ref, int iat, int dx, int getNumDets, const OffloadVector< ValueType > &det0_grad_list, const RefVector< OffloadMatrix< ValueType >> &psiinv_list, const RefVector< OffloadMatrix< ValueType >> &psi_list, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign, const RefVector< OffloadVector< ValueType >> &WorkSpace_list, const RefVector< OffloadMatrix< ValueType >> &table_matrix_list, UnpinnedOffloadMatrix< ValueType > &mw_grads)
Function to calculate the ratio of the gradients of the excited determinant to the reference determin...
void evaluateGradsWithSpin(ParticleSet &P, int iat)
evaluate the gradients of all the unique determinants with one electron moved. Used by the table meth...
VALUE calcSmallDeterminant(size_t n, const VALUE *table_matrix, const int *it, const size_t nb_cols)
OffloadVector< ValueType * > table_matrix_deviceptr_list
NewTimer & buildTable_timer
static constexpr size_t MaxSmallDet
for matrices with leading dimensions <= MaxSmallDet, compute determinant with direct expansion...
OffloadVector< ValueType * > workV2_deviceptr_list
NewTimer & table2ratios_timer
static constexpr int ReferenceDeterminant
all the unique determinants are sorted, the id of the reference det id is always 0 ...
OffloadMatrix< ValueType > TpsiM
size_type extent(int i) const
void mw_updateRatios_generic(const int ext_level, const size_t det_offset, const size_t data_offset, SmallMatrixDetCalculator< ValueType > &det_calculator, const OffloadVector< int > &data, const OffloadVector< RealType > &sign, const RefVector< OffloadMatrix< ValueType >> &table_matrix_list, const RefVector< OffloadVector< ValueType >> &ratios_list) const
update ratios with respect to the reference deteriminant for a given excitation level ...
OffloadVector< ValueType * > psi_deviceptr_list
void evaluateDetsAndGradsForPtclMoveWithSpin(const ParticleSet &P, int iat)
evaluate the value and gradients of all the unique determinants with one electron moved...
Specialized paritlce class for atomistic simulations.
static void mw_evaluateGrads(const RefVectorWithLeader< MultiDiracDeterminant > &det_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, UnpinnedOffloadMatrix< ValueType > &mw_grads)
multi walker version of mw_evaluateGrads
void buildTableMatrix_calculateRatiosValueMatrixOneParticle(int ref, const OffloadMatrix< ValueType > &psiinv, const OffloadMatrix< ValueType > &psi, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign, OffloadMatrix< ValueType > &table_matrix, int iat, Matrix< ValueType > &ratios)
size_type size() const
return the current size
OffloadVector< ValueType > psiV
value of single-particle orbital for particle-by-particle update
NewTimer & evalOrbValue_timer
ompBLAS_status gemv_batched(ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
void buildTableMatrix_calculateRatios_impl(int ref, ValueType det0, ValueType *restrict ratios, const OffloadMatrix< ValueType > &psiinv, const OffloadMatrix< ValueType > &psi, OffloadMatrix< ValueType > &table_matrix, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign)
Function to calculate the ratio of the excited determinant to the reference determinant in Customized...
QTBase::ValueType ValueType
void mw_updateRatios_det0(const OffloadVector< ValueType > &det0_list, const OffloadVector< ValueType *> &ratios_deviceptr_list) const
update ratios of the reference deteriminant
std::shared_ptr< OffloadVector< RealType > > DetSigns
OffloadVector< ValueType > workV1
MatA::value_type DetRatioByColumn(const MatA &Minv, const VecB &newv, int colchanged)
determinant ratio with a column substitution
OffloadMatrix< ValueType > psiMinv
inverse Dirac determinant matrix of the reference det
static void mw_evaluateDetsForPtclMove(const RefVectorWithLeader< MultiDiracDeterminant > &det_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat)
multi walker version of evaluateDetsForPtclMove
NewTimer & evalOrbVGL_timer
Matrix< ValueType > spingrads
ompBLAS_status copy_batched_offset(ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
copy device data from x to y with additional offset applied to array of device pointers ...
std::shared_ptr< VectorSoaContainer< int, 2, OffloadPinnedAllocator< int > > > uniquePairs
Matrix< ValueType > new_spingrads
ompBLAS_status ger_batched(ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
ValueType curRatio
new value of the reference determinant over the old value upon a proposed move
OffloadVector< ValueType > WorkSpace
static T evaluate(T a11, T a12, T a21, T a22)
void mw_buildTableMatrix_calculateRatios_impl(MultiDiracDetMultiWalkerResource &mw_res, int ref, const OffloadVector< ValueType > &det0_list, const RefVector< OffloadMatrix< ValueType >> &psiinv_list, const RefVector< OffloadMatrix< ValueType >> &psi_list, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign, const RefVector< OffloadMatrix< ValueType >> &table_matrix_list, const RefVector< OffloadVector< ValueType >> &ratios_list)
Function to calculate the ratio of the excited determinant to the reference determinant in Customized...
OMPallocator is an allocator with fused device and dualspace allocator functionality.
OffloadMatrix< ValueType > table_matrix
SmallMatrixDetCalculator< ValueType > det_calculator_
OffloadVector< ValueType > psiV_temp
OffloadVector< ValueType * > ratios_deviceptr_list
std::vector< std::reference_wrapper< T > > RefVector
NewTimer & updateRatios_timer
const int NumOrbitals
number of single-particle orbitals which belong to this Dirac determinant
void mw_buildTableMatrix_calculateRatios(MultiDiracDetMultiWalkerResource &mw_res, int ref, const OffloadVector< ValueType > &det0_list, const RefVector< OffloadMatrix< ValueType >> &psiinv_list, const RefVector< OffloadMatrix< ValueType >> &psi_list, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign, const RefVector< OffloadMatrix< ValueType >> &table_matrix_list, const RefVector< OffloadVector< ValueType >> &ratios_list)
OffloadVector< ValueType > cminus_one_vec
Tensor< T, D > inverse(const Tensor< T, D > &a)
void evaluateDetsForPtclMove(const ParticleSet &P, int iat, int refPtcl=-1)
evaluate the value of all the unique determinants with one electron moved.
OffloadMatrix< ValueType > psiM
psiM(i,j) TpsiM(i,j)
OffloadVector< ValueType > new_ratios_to_ref_
new determinant ratios with respect to the updated reference determinant upon a proposed move ...
ResourceHandle< MultiDiracDetMultiWalkerResource > mw_res_handle_
void evaluateDetsAndGradsForPtclMove(const ParticleSet &P, int iat)
evaluate the value and gradients of all the unique determinants with one electron moved...
NewTimer & evaluateDetsForPtclMove_timer
void mw_InverseUpdateByColumn(MultiDiracDetMultiWalkerResource &mw_res, const int working_index, const OffloadVector< ValueType > &curRatio_list, const OffloadVector< ValueType *> &psiV_deviceptr_list, const OffloadVector< ValueType *> &psiMinv_deviceptr_list, const size_t psiMinv_rows) const
OffloadVector< GradType > dpsiV
LatticeGaussianProduct::ValueType ValueType
OffloadVector< ValueType * > workV1_deviceptr_list
NewTimer & calculateGradRatios_timer
void buildTableMatrix_calculateRatios(int ref, const OffloadMatrix< ValueType > &psiinv, const OffloadMatrix< ValueType > &psi, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign, OffloadMatrix< ValueType > &table_matrix, OffloadVector< ValueType > &ratios)
compute the ratio of the excited determinants to the reference determinant
const int FirstIndex
index of the first particle with respect to the particle set
void evaluate(Matrix< T, Alloc > &lhs, const Op &op, const Expression< RHS > &rhs)
const int LastIndex
index of the last particle with respect to the particle set
static void mw_evaluateDetsAndGradsForPtclMove(const RefVectorWithLeader< MultiDiracDeterminant > &det_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, UnpinnedOffloadMatrix< ValueType > &mw_grads)
multi walker version of mw_evaluateDetsAndGradsForPtclMove
ompBLAS_status copy_batched(ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
copy device data from x to y
void mw_updateRatios(const size_t det_offset, const size_t data_offset, const OffloadVector< int > &data, const OffloadVector< RealType > &sign, const OffloadVector< ValueType *> &table_matrix_deviceptr_list, const size_t num_table_matrix_cols, const OffloadVector< ValueType *> &ratios_deviceptr_list) const
update ratios with respect to the reference deteriminant for a given excitation level ...
void evaluateGrads(ParticleSet &P, int iat)
evaluate the gradients of all the unique determinants with one electron moved. Used by the table meth...
NewTimer & evaluateDetsAndGradsForPtclMove_timer
NewTimer & transferH2D_timer
OffloadVector< ValueType * > psiinv_deviceptr_list
OffloadVector< ValueType > d2psiV
NewTimer & transferD2H_timer
OffloadVector< ValueType > workV2
void InverseUpdateByColumn(Matrix< T, ALLOC > &Minv, Vector< T, ALLOC > &newcol, Vector< T, ALLOC > &rvec, Vector< T, ALLOC > &rvecinv, int colchanged, T c_ratio)
void buildTableMatrix_calculateGradRatios(int ref, const OffloadMatrix< ValueType > &psiinv, const OffloadMatrix< ValueType > &psi, const OffloadVector< int > &data, const VectorSoaContainer< int, 2, OffloadPinnedAllocator< int >> &pairs, const OffloadVector< RealType > &sign, const ValueType &det0_grad, OffloadMatrix< ValueType > &table_matrix, int dx, int iat, Matrix< GradType > &grads)
Function to calculate the ratio of the gradients of the excited determinant to the reference determin...
int UpdateMode
current update mode