Functions
template<typename T1 , typename T2 >
T2	accumulate_n (const T1 *restrict in, size_t n, T2 res)

template<typename T1 , typename T2 >
void	copy (T1 restrict target, const T2 restrict source, size_t n)
	copy function using memcpy More...

template<typename T >
void	copy (T restrict target, const T restrict source, size_t n)
	copy function using memcpy More...

template<typename T1 , typename T2 >
void	copy (T1 restrict target_r, T1 restrict target_i, const std::complex< T2 > *restrict source, size_t n)
	copy complex to two real containers More...

template<typename T >
void	accumulate_phases (const int &n, const std::complex< T > restrict x, const std::complex< T > restrict y, T &rN, T &iN, T &riN)

template<typename T , typename TO >
void	transpose (const T restrict A, size_t m, size_t lda, TO restrict B, size_t n, size_t ldb)
	transpose of A(m,n) to B(n,m) More...

template<typename T , typename TO >
void	remapCopy (size_t m, size_t n, const T restrict A, size_t lda, TO restrict B, size_t ldb)
	copy of A(m,n) to B(m,n) More...

template<typename T , typename TRES = T>
T	dot (const T restrict a, const T restrict b, int n, TRES res=TRES())
	dot product More...

template<class T , unsigned D, class TRES = T>
TinyVector< T, D >	dot (const T a, const TinyVector< T, D > b, int n, TinyVector< TRES, D > res=TinyVector< T, D >())
	inline dot product More...

template<class T , unsigned D, class TRES = T>
Tensor< T, D >	dot (const T a, const Tensor< T, D > b, int n, Tensor< TRES, D > res=Tensor< TRES, D >())
	inline dot product More...

template<typename T >
T	dot (int n, const T restrict x, int incx, const T restrict y, int incy)
	x*y dot product of two vectors using the same argument list for blas::dot More...

template<typename T , typename SIZET >
void	remainder (const T restrict in, T restrict out, SIZET n)
	mod on an array out[i]=in[i]-floor(in[i]) More...

template<typename T , typename SIZET >
void	remainder (T *restrict inout, SIZET n)

template<typename T , typename SIZET >
void	sqrt (T *restrict inout, SIZET n)

template<typename T >
void	sqrt (const T restrict in, T restrict out, int n)

template<typename T >
void	inv (const T restrict in, T restrict out, int n)

template<typename T >
void	add (int n, const T restrict in, T restrict out)

Function Documentation

◆ accumulate_n()

T2 qmcplusplus::simd::accumulate_n	(	const T1 *restrict	in,
		size_t	n,
		T2	res
	)

inline

Definition at line 26 of file algorithm.hpp.

References qmcplusplus::n.

Referenced by J1OrbitalSoA< FT >::computeGL(), JeeIOrbitalSoA< FT >::computeGL(), TwoBodyJastrow< FT >::computeGL(), J1Spin< FT >::computeGL(), JeeIOrbitalSoA< FT >::computeU3_engine(), TwoBodyJastrow< FT >::ratioGrad(), J1OrbitalSoA< FT >::ratioGrad(), J1Spin< FT >::ratioGrad(), J1Spin< FT >::recompute(), TwoBodyJastrow< FT >::recompute(), and J1OrbitalSoA< FT >::recompute().

 {
 #pragma omp simd reduction(+ : res)
   for (int i = 0; i < n; ++i)
     res += in[i];
   return res;
 }

◆ accumulate_phases()

void qmcplusplus::simd::accumulate_phases	(	const int &	n,
		const std::complex< T > *restrict	x,
		const std::complex< T > *restrict	y,
		T &	rN,
		T &	iN,
		T &	riN
	)

inline

Definition at line 69 of file algorithm.hpp.

References qmcplusplus::n.

 {
   for (int i = 0; i < n; ++i)
   {
     T tr = x[i].real() * y[i].real() - x[i].imag() * y[i].imag();
     T ti = x[i].real() * y[i].imag() + x[i].imag() * y[i].real();
     rN += tr * tr;
     iN += ti * ti;
     riN += tr * ti;
   } //
 }

◆ add()

void qmcplusplus::simd::add	(	int	n,
		const T *restrict	in,
		T *restrict	out
	)

inline

Definition at line 95 of file vmath.hpp.

References qmcplusplus::n.

Referenced by SPOSetInfo::add(), and ci_configuration::isSingle().

 {
   for (int i = 0; i < n; ++i)
     out[i] += in[i];
 }

◆ copy() [1/3]

void qmcplusplus::simd::copy	(	T1 *restrict	target,
		const T2 *restrict	source,
		size_t	n
	)

inline

copy function using memcpy

Parameters

target	starting address of the target
source	starting address of the source
n	size of the data to copy

Definition at line 40 of file algorithm.hpp.

References qmcplusplus::n.

Referenced by DiracDeterminant< DU_TYPE >::acceptMove(), and DiracDeterminantBatched< PL, VT, FPVT >::acceptMove().

 {
   for (size_t i = 0; i < n; ++i)
     target[i] = static_cast<T1>(source[i]);
 }

◆ copy() [2/3]

void qmcplusplus::simd::copy	(	T *restrict	target,
		const T *restrict	source,
		size_t	n
	)

inline

copy function using memcpy

Parameters

target	starting address of the target
source	starting address of the source
n	size of the data to copy

Definition at line 52 of file algorithm.hpp.

References qmcplusplus::n.

 {
   memcpy(target, source, sizeof(T) * n);
 }

◆ copy() [3/3]

void qmcplusplus::simd::copy	(	T1 *restrict	target_r,
		T1 *restrict	target_i,
		const std::complex< T2 > *restrict	source,
		size_t	n
	)

inline

copy complex to two real containers

Definition at line 59 of file algorithm.hpp.

References qmcplusplus::n.

 {
   for (int i = 0; i < n; ++i)
   {
     *target_r++ = static_cast<T1>(source[i].real());
     *target_i++ = static_cast<T1>(source[i].imag());
   }
 }

◆ dot() [1/4]

T qmcplusplus::simd::dot	(	const T *restrict	a,
		const T *restrict	b,
		int	n,
		TRES	res = `TRES()`
	)

inline

dot product

Parameters

a	starting address of an array of type T
b	starting address of an array of type T
n	size
res	initial value, can be used to override precision in reduction

Returns: $res = \sum_i a[i] b[i]$

Definition at line 41 of file inner_product.hpp.

References qmcplusplus::n.

 {
   for (int i = 0; i < n; i++)
     res += a[i] * b[i];
   return res;
 }

◆ dot() [2/4]

TinyVector<T, D> qmcplusplus::simd::dot	(	const T *	a,
		const TinyVector< T, D > *	b,
		int	n,
		TinyVector< TRES, D >	res = `TinyVector<T, D>()`
	)

inline

inline dot product

Parameters

a	starting address of an array of type T
b	starting address of an array of type TinyVector<T,D>
n	size
res	initial value, can be used to override precision in reduction

Definition at line 55 of file inner_product.hpp.

References qmcplusplus::n.

 {
   for (int i = 0; i < n; i++)
     res += a[i] * b[i];
   return res;
 }

◆ dot() [3/4]

Tensor<T, D> qmcplusplus::simd::dot	(	const T *	a,
		const Tensor< T, D > *	b,
		int	n,
		Tensor< TRES, D >	res = `Tensor<TRES, D>()`
	)

inline

inline dot product

Parameters

a	starting address of an array of type T
b	starting address of an array of type Tensor<T, D>
n	size
res	initial value, can be used to override precision in reduction

Returns: ${\bf v} = \sum_i a[i] {\bf b}[i]$

Definition at line 70 of file inner_product.hpp.

References qmcplusplus::n.

 {
   for (int i = 0; i < n; i++)
     res += a[i] * b[i];
   return res;
 }

◆ dot() [4/4]

T qmcplusplus::simd::dot	(	int	n,
		const T *restrict	x,
		int	incx,
		const T *restrict	y,
		int	incy
	)

inline

x*y dot product of two vectors using the same argument list for blas::dot

Parameters

n	size
x	starting address of x
incx	stride of x
y	starting address of y
incx	stride of y
return	$\sum_i x[i+=incx]*y[i+=incy]$

Definition at line 86 of file inner_product.hpp.

References qmcplusplus::n.

 {
   const int xmax = incx * n;
   T res          = 0.0;
   for (int ic = 0, jc = 0; ic < xmax; ic += incx, jc += incy)
     res += x[ic] * y[jc];
   return res;
 }

◆ inv()

void qmcplusplus::simd::inv	(	const T *restrict	in,
		T *restrict	out,
		int	n
	)

inline

Definition at line 65 of file vmath.hpp.

References qmcplusplus::n.

 {
   for (int i = 0; i < n; ++i)
     out[i] = 1.0 / in[i];
 }

◆ remainder() [1/2]

void qmcplusplus::simd::remainder	(	const T *restrict	in,
		T *restrict	out,
		SIZET	n
	)

inline

mod on an array out[i]=in[i]-floor(in[i])

Definition at line 38 of file vmath.hpp.

References qmcplusplus::floor(), and qmcplusplus::n.

Referenced by qmcplusplus::put2box().

 {
   for (SIZET i = 0; i < n; ++i)
     out[i] = in[i] - std::floor(in[i]);
 }

◆ remainder() [2/2]

void qmcplusplus::simd::remainder	(	T *restrict	inout,
		SIZET	n
	)

inline

Definition at line 45 of file vmath.hpp.

References qmcplusplus::floor(), and qmcplusplus::n.

 {
   for (SIZET i = 0; i < n; ++i)
     inout[i] -= std::floor(inout[i]);
 }

◆ remapCopy()

void qmcplusplus::simd::remapCopy	(	size_t	m,
		size_t	n,
		const T *restrict	A,
		size_t	lda,
		TO *restrict	B,
		size_t	ldb
	)

inline

copy of A(m,n) to B(m,n)

Parameters

A	starting address, A(m,lda)
m	number of A rows
lda	stride of A's row
B	starting address B(m,ldb)
m	number of B rows
ldb	stride of B's row

Blas-like interface

Definition at line 115 of file algorithm.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by DiracMatrixComputeOMPTarget< VALUE_FP >::invert_transpose(), and DiracMatrixComputeCUDA< VALUE_FP >::mw_computeInvertAndLog().

 {
   for (size_t j = 0; j < m; ++j)
     for (size_t i = 0; i < n; ++i)
       B[j * ldb + i] = A[j * lda + i];
 }

◆ sqrt() [1/2]

void qmcplusplus::simd::sqrt	(	T *restrict	inout,
		SIZET	n
	)

inline

Definition at line 52 of file vmath.hpp.

References qmcplusplus::n, and sqrt().

 {
   for (SIZET i = 0; i < n; ++i)
     inout[i] = std::sqrt(inout[i]);
 }

◆ sqrt() [2/2]

void qmcplusplus::simd::sqrt	(	const T *restrict	in,
		T *restrict	out,
		int	n
	)

inline

Definition at line 59 of file vmath.hpp.

References qmcplusplus::n.

Referenced by sqrt().

 {
   for (int i = 0; i < n; ++i)
     out[i] = std::sqrt(in[i]);
 }

◆ transpose()

void qmcplusplus::simd::transpose	(	const T *restrict	A,
		size_t	m,
		size_t	lda,
		TO *restrict	B,
		size_t	n,
		size_t	ldb
	)

inline

transpose of A(m,n) to B(n,m)

Parameters

A	starting address, A(m,lda)
m	number of A rows
lda	stride of A's row
B	starting address B(n,ldb)
n	number of B rows
ldb	stride of B's row

Blas-like interface

Definition at line 97 of file algorithm.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by DiracDeterminantWithBackflow::evaluate_SPO(), DiracMatrixComputeOMPTarget< VALUE_FP >::invert_transpose(), DiracMatrix< VALUE_FP >::invert_transpose(), DiracMatrixComputeCUDA< VALUE_FP >::invert_transpose(), qmcplusplus::testing::makeRngSpdMatrix(), DiracMatrixComputeCUDA< VALUE_FP >::mw_invertTranspose(), qmcplusplus::TEMPLATE_TEST_CASE(), qmcplusplus::TEST_CASE(), qmcplusplus::test_DiracDeterminant_delayed_update(), qmcplusplus::test_DiracDeterminant_second(), qmcplusplus::test_DiracDeterminantBatched_delayed_update(), qmcplusplus::test_DiracDeterminantBatched_second(), and qmcplusplus::MatrixOperators::transpose().

 {
   for (size_t i = 0; i < n; ++i)
     for (size_t j = 0; j < m; ++j)
       B[i * ldb + j] = A[j * lda + i];
 }

Functions

Function Documentation

◆ accumulate_n()

◆ accumulate_phases()

◆ add()

◆ copy() [1/3]

◆ copy() [2/3]

◆ copy() [3/3]

◆ dot() [1/4]

◆ dot() [2/4]

◆ dot() [3/4]

◆ dot() [4/4]

◆ inv()

◆ remainder() [1/2]

◆ remainder() [2/2]

◆ remapCopy()

◆ sqrt() [1/2]

◆ sqrt() [2/2]

◆ transpose()