23 : cuda_default_device_num(-1), cuda_device_count(0)
29 throw std::runtime_error(
"Inconsistent number of CUDA devices with the previous record!");
31 app_warning() <<
"More CUDA devices than the number of MPI ranks. " 32 <<
"Some devices will be left idle.\n" 33 <<
"There is potential performance issue with the GPU affinity. " 34 <<
"Use CUDA_VISIBLE_DEVICE or MPI launcher to expose desired devices.\n";
38 if (default_device_num < 0)
41 throw std::runtime_error(
"Inconsistent assigned CUDA devices with the previous record!");
std::ostream & app_warning()
helper functions for EinsplineSetBuilder
CUDADeviceManager(int &default_device_num, int &num_devices, int local_rank, int local_size)
Declaration of OutputManager class.
handle CUDA/HIP runtime selection.
cudaErrorCheck(cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
#define cudaGetDeviceCount
int cuda_default_device_num
int determineDefaultDeviceNum(int num_devices, int rank_id, int num_ranks)
distribute MPI ranks among devices