Cycles: limit number of processes compiling OpenCL kernel based on memory

The numbers here can probably be tweaked to be better, but it's hard to
predict and this should at least avoid excessive memory swapping.

Fixes T57064.
This commit is contained in:
Brecht Van Lommel 2020-03-25 13:11:09 +01:00
parent e5f7b31dd4
commit f48d15a861
Notes: blender-bot 2023-02-14 04:24:05 +01:00
Referenced by issue #75064, Limit Cycles OpenCL parallel compilation memory usage
4 changed files with 99 additions and 8 deletions

View File

@ -257,16 +257,16 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
/* Ordered with most complex kernels first, to reduce overall compile time. */
ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
if (requested_features.use_volume || is_preview) {
ADD_SPLIT_KERNEL_PROGRAM(do_volume);
}
ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
/* Quick kernels bundled in a single program to reduce overhead of starting
* Blender processes. */

View File

@ -23,6 +23,7 @@
# include "util/util_logging.h"
# include "util/util_md5.h"
# include "util/util_path.h"
# include "util/util_semaphore.h"
# include "util/util_system.h"
# include "util/util_time.h"
@ -390,8 +391,27 @@ static void escape_python_string(string &str)
string_replace(str, "'", "\'");
}
static int opencl_compile_process_limit()
{
/* Limit number of concurrent processes compiling, with a heuristic based
* on total physical RAM and estimate of memory usage needed when compiling
* with all Cycles features enabled.
*
* This is somewhat arbitrary as we don't know the actual available RAM or
* how much the kernel compilation will needed depending on the features, but
* better than not limiting at all. */
static const int64_t GB = 1024LL * 1024LL * 1024LL;
static const int64_t process_memory = 2 * GB;
static const int64_t base_memory = 2 * GB;
static const int64_t system_memory = system_physical_ram();
static const int64_t process_limit = (system_memory - base_memory) / process_memory;
return max((int)process_limit, 1);
}
bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
{
/* Construct arguments. */
vector<string> args;
args.push_back("--background");
args.push_back("--factory-startup");
@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
kernel_file_escaped.c_str(),
clbin_escaped.c_str()));
double starttime = time_dt();
/* Limit number of concurrent processes compiling. */
static thread_counting_semaphore semaphore(opencl_compile_process_limit());
semaphore.acquire();
/* Compile. */
const double starttime = time_dt();
add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
add_log(string("Build flags: ") + kernel_build_options, true);
if (!system_call_self(args) || !path_exists(clbin)) {
const bool success = system_call_self(args);
const double elapsed = time_dt() - starttime;
semaphore.release();
if (!success || !path_exists(clbin)) {
return false;
}
double elapsed = time_dt() - starttime;
add_log(
string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
false);

View File

@ -102,6 +102,7 @@ set(SRC_HEADERS
util_sky_model_data.h
util_avxf.h
util_avxb.h
util_semaphore.h
util_sseb.h
util_ssef.h
util_ssei.h

View File

@ -0,0 +1,61 @@
/*
* Copyright 2011-2020 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __UTIL_SEMAPHORE_H__
#define __UTIL_SEMAPHORE_H__
#include "util/util_thread.h"
CCL_NAMESPACE_BEGIN
/* Counting Semaphore
*
* To restrict concurrent access to a resource to a specified number
* of threads. Similar to std::counting_semaphore from C++20. */
class thread_counting_semaphore {
public:
explicit thread_counting_semaphore(const int count) : count(count)
{
}
thread_counting_semaphore(const thread_counting_semaphore &) = delete;
void acquire()
{
thread_scoped_lock lock(mutex);
while (count == 0) {
condition.wait(lock);
}
count--;
}
void release()
{
thread_scoped_lock lock(mutex);
count++;
condition.notify_one();
}
protected:
thread_mutex mutex;
thread_condition_variable condition;
int count;
};
CCL_NAMESPACE_END
#endif /* __UTIL_SEMAPHORE_H__ */