Cleanup: make HIP and CUDA code more consistent

Ref D12834
This commit is contained in:
Brecht Van Lommel 2021-10-20 20:40:32 +02:00
parent 6ef8c9e646
commit 39810b3f51
2 changed files with 17 additions and 14 deletions

View File

@ -41,13 +41,19 @@ CUDADeviceQueue::~CUDADeviceQueue()
int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const
{
int num_states = max(cuda_device_->get_num_multiprocessors() *
cuda_device_->get_max_num_threads_per_multiprocessor() * 16,
1048576);
const int max_num_threads = cuda_device_->get_num_multiprocessors() *
cuda_device_->get_max_num_threads_per_multiprocessor();
int num_states = max(max_num_threads, 65536) * 16;
const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
if (factor_str) {
num_states = max((int)(num_states * atof(factor_str)), 1024);
const float factor = (float)atof(factor_str);
if (factor != 0.0f) {
num_states = max((int)(num_states * factor), 1024);
}
else {
VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
}
}
VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "

View File

@ -41,22 +41,19 @@ HIPDeviceQueue::~HIPDeviceQueue()
int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const
{
int num_states = 0;
const int max_num_threads = hip_device_->get_num_multiprocessors() *
hip_device_->get_max_num_threads_per_multiprocessor();
if (max_num_threads == 0) {
num_states = 1048576; // 65536 * 16
}
else {
num_states = max_num_threads * 16;
}
int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16;
const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
if (factor_str) {
float factor = (float)atof(factor_str);
if (!factor)
const float factor = (float)atof(factor_str);
if (factor != 0.0f) {
num_states = max((int)(num_states * factor), 1024);
}
else {
VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
num_states = max((int)(num_states * factor), 1024);
}
}
VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "