Cleanup: make HIP and CUDA code more consistent

Ref D12834
2021-10-20 20:40:32 +02:00 · 2021-10-20 20:40:32 +02:00 · 39810b3f51
parent 6ef8c9e646
commit 39810b3f51
2 changed files with 17 additions and 14 deletions
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@ -41,13 +41,19 @@ CUDADeviceQueue::~CUDADeviceQueue()

 int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const
 {
-  int num_states = max(cuda_device_->get_num_multiprocessors() *
-                           cuda_device_->get_max_num_threads_per_multiprocessor() * 16,
-                       1048576);
+  const int max_num_threads = cuda_device_->get_num_multiprocessors() *
+                              cuda_device_->get_max_num_threads_per_multiprocessor();
+  int num_states = max(max_num_threads, 65536) * 16;

  const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
  if (factor_str) {
-    num_states = max((int)(num_states * atof(factor_str)), 1024);
+    const float factor = (float)atof(factor_str);
+    if (factor != 0.0f) {
+      num_states = max((int)(num_states * factor), 1024);
+    }
+    else {
+      VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
+    }
  }

  VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@ -41,22 +41,19 @@ HIPDeviceQueue::~HIPDeviceQueue()

 int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const
 {
-  int num_states = 0;
  const int max_num_threads = hip_device_->get_num_multiprocessors() *
                              hip_device_->get_max_num_threads_per_multiprocessor();
-  if (max_num_threads == 0) {
-    num_states = 1048576;  // 65536 * 16
-  }
-  else {
-    num_states = max_num_threads * 16;
-  }
+  int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16;

  const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
  if (factor_str) {
-    float factor = (float)atof(factor_str);
-    if (!factor)
+    const float factor = (float)atof(factor_str);
+    if (factor != 0.0f) {
+      num_states = max((int)(num_states * factor), 1024);
+    }
+    else {
      VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
-    num_states = max((int)(num_states * factor), 1024);
+    }
  }

  VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "