Performance: Use parallel range for ImBuf scanline processor.

Scanline processor did its own heurestic what didn't scale well when
having a multiple cores. In stead of using our own code this patch will
leave it to TBB to determine how to split the scanlines over the
available threads.

Performance of the IMB_transform before this change was 0.002123s, with
this change 0.001601s. This change increases performance in other areas
as well including color management conversions.

Reviewed By: zeddb

Differential Revision: https://developer.blender.org/D11578
This commit is contained in:
Jeroen Bakker 2021-06-11 15:55:09 +02:00 committed by Jeroen Bakker
parent 7b76a160a4
commit 7b30a3e98d
6 changed files with 55 additions and 78 deletions

View File

@ -69,10 +69,11 @@ static void image_buf_fill_color_slice(
}
}
static void image_buf_fill_color_thread_do(void *data_v, int start_scanline, int num_scanlines)
static void image_buf_fill_color_thread_do(void *data_v, int scanline)
{
FillColorThreadData *data = (FillColorThreadData *)data_v;
size_t offset = ((size_t)start_scanline) * data->width * 4;
const int num_scanlines = 1;
size_t offset = ((size_t)scanline) * data->width * 4;
unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
image_buf_fill_color_slice(rect, rect_float, data->width, num_scanlines, data->color);
@ -197,13 +198,14 @@ typedef struct FillCheckerThreadData {
int width;
} FillCheckerThreadData;
static void image_buf_fill_checker_thread_do(void *data_v, int start_scanline, int num_scanlines)
static void image_buf_fill_checker_thread_do(void *data_v, int scanline)
{
FillCheckerThreadData *data = (FillCheckerThreadData *)data_v;
size_t offset = ((size_t)start_scanline) * data->width * 4;
size_t offset = ((size_t)scanline) * data->width * 4;
const int num_scanlines = 1;
unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
image_buf_fill_checker_slice(rect, rect_float, data->width, num_scanlines, start_scanline);
image_buf_fill_checker_slice(rect, rect_float, data->width, num_scanlines, scanline);
}
void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int width, int height)
@ -444,16 +446,15 @@ typedef struct FillCheckerColorThreadData {
int width, height;
} FillCheckerColorThreadData;
static void checker_board_color_prepare_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
static void checker_board_color_prepare_thread_do(void *data_v, int scanline)
{
FillCheckerColorThreadData *data = (FillCheckerColorThreadData *)data_v;
size_t offset = ((size_t)data->width) * start_scanline * 4;
const int num_scanlines = 1;
size_t offset = ((size_t)data->width) * scanline * 4;
unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
checker_board_color_prepare_slice(
rect, rect_float, data->width, num_scanlines, start_scanline, data->height);
rect, rect_float, data->width, num_scanlines, scanline, data->height);
}
void BKE_image_buf_fill_checker_color(unsigned char *rect,

View File

@ -733,7 +733,7 @@ void IMB_processor_apply_threaded(
void(init_handle)(void *handle, int start_line, int tot_line, void *customdata),
void *(do_thread)(void *));
typedef void (*ScanlineThreadFunc)(void *custom_data, int start_scanline, int num_scanlines);
typedef void (*ScanlineThreadFunc)(void *custom_data, int scanline);
void IMB_processor_apply_threaded_scanlines(int total_scanlines,
ScanlineThreadFunc do_thread,
void *custom_data);

View File

@ -3539,12 +3539,11 @@ typedef struct PartialThreadData {
int xmin, ymin, xmax;
} PartialThreadData;
static void partial_buffer_update_rect_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
static void partial_buffer_update_rect_thread_do(void *data_v, int scanline)
{
PartialThreadData *data = (PartialThreadData *)data_v;
int ymin = data->ymin + start_scanline;
int ymin = data->ymin + scanline;
const int num_scanlines = 1;
partial_buffer_update_rect(data->ibuf,
data->display_buffer,
data->linear_buffer,

View File

@ -536,13 +536,12 @@ typedef struct FloatToFloatThreadData {
int stride_from;
} FloatToFloatThreadData;
static void imb_buffer_float_from_float_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
static void imb_buffer_float_from_float_thread_do(void *data_v, int scanline)
{
const int num_scanlines = 1;
FloatToFloatThreadData *data = (FloatToFloatThreadData *)data_v;
size_t offset_from = ((size_t)start_scanline) * data->stride_from * data->channels_from;
size_t offset_to = ((size_t)start_scanline) * data->stride_to * data->channels_from;
size_t offset_from = ((size_t)scanline) * data->stride_from * data->channels_from;
size_t offset_to = ((size_t)scanline) * data->stride_to * data->channels_from;
IMB_buffer_float_from_float(data->rect_to + offset_to,
data->rect_from + offset_from,
data->channels_from,

View File

@ -412,42 +412,34 @@ static void imb_transform_calc_add_y(const float transform_matrix[3][3],
typedef void (*InterpolationColorFunction)(
struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v);
BLI_INLINE void imb_transform_scanlines(const TransformUserData *user_data,
int start_scanline,
int num_scanlines,
int scanline,
InterpolationColorFunction interpolation)
{
const int width = user_data->dst->x;
float next_line_start_uv[2];
madd_v2_v2v2fl(next_line_start_uv, user_data->start_uv, user_data->add_y, start_scanline);
float uv[2];
madd_v2_v2v2fl(uv, user_data->start_uv, user_data->add_y, scanline);
unsigned char *outI = NULL;
float *outF = NULL;
pixel_from_buffer(user_data->dst, &outI, &outF, 0, start_scanline);
pixel_from_buffer(user_data->dst, &outI, &outF, 0, scanline);
for (int yi = start_scanline; yi < start_scanline + num_scanlines; yi++) {
float uv[2];
copy_v2_v2(uv, next_line_start_uv);
add_v2_v2(next_line_start_uv, user_data->add_y);
for (int xi = 0; xi < width; xi++) {
if (uv[0] >= user_data->src_crop.xmin && uv[0] < user_data->src_crop.xmax &&
uv[1] >= user_data->src_crop.ymin && uv[1] < user_data->src_crop.ymax) {
interpolation(user_data->src, outI, outF, uv[0], uv[1]);
}
add_v2_v2(uv, user_data->add_x);
if (outI) {
outI += 4;
}
if (outF) {
outF += 4;
}
for (int xi = 0; xi < width; xi++) {
if (uv[0] >= user_data->src_crop.xmin && uv[0] < user_data->src_crop.xmax &&
uv[1] >= user_data->src_crop.ymin && uv[1] < user_data->src_crop.ymax) {
interpolation(user_data->src, outI, outF, uv[0], uv[1]);
}
add_v2_v2(uv, user_data->add_x);
if (outI) {
outI += 4;
}
if (outF) {
outF += 4;
}
}
}
static void imb_transform_nearest_scanlines(void *custom_data,
int start_scanline,
int num_scanlines)
static void imb_transform_nearest_scanlines(void *custom_data, int scanline)
{
const TransformUserData *user_data = custom_data;
InterpolationColorFunction interpolation = NULL;
@ -457,12 +449,10 @@ static void imb_transform_nearest_scanlines(void *custom_data,
else {
interpolation = nearest_interpolation_color_char;
}
imb_transform_scanlines(user_data, start_scanline, num_scanlines, interpolation);
imb_transform_scanlines(user_data, scanline, interpolation);
}
static void imb_transform_bilinear_scanlines(void *custom_data,
int start_scanline,
int num_scanlines)
static void imb_transform_bilinear_scanlines(void *custom_data, int scanline)
{
const TransformUserData *user_data = custom_data;
InterpolationColorFunction interpolation = NULL;
@ -472,7 +462,7 @@ static void imb_transform_bilinear_scanlines(void *custom_data,
else if (user_data->dst->rect) {
interpolation = bilinear_interpolation_color_char;
}
imb_transform_scanlines(user_data, start_scanline, num_scanlines, interpolation);
imb_transform_scanlines(user_data, scanline, interpolation);
}
static ScanlineThreadFunc imb_transform_scanline_func(const eIMBInterpolationFilterMode filter)
@ -568,41 +558,28 @@ void IMB_processor_apply_threaded(
typedef struct ScanlineGlobalData {
void *custom_data;
ScanlineThreadFunc do_thread;
int scanlines_per_task;
int total_scanlines;
} ScanlineGlobalData;
static void processor_apply_scanline_func(TaskPool *__restrict pool, void *taskdata)
static void processor_apply_parallel(void *__restrict userdata,
const int scanline,
const TaskParallelTLS *__restrict UNUSED(tls))
{
ScanlineGlobalData *data = BLI_task_pool_user_data(pool);
int start_scanline = POINTER_AS_INT(taskdata);
int num_scanlines = min_ii(data->scanlines_per_task, data->total_scanlines - start_scanline);
data->do_thread(data->custom_data, start_scanline, num_scanlines);
ScanlineGlobalData *data = userdata;
data->do_thread(data->custom_data, scanline);
}
void IMB_processor_apply_threaded_scanlines(int total_scanlines,
ScanlineThreadFunc do_thread,
void *custom_data)
{
const int scanlines_per_task = 64;
ScanlineGlobalData data;
data.custom_data = custom_data;
data.do_thread = do_thread;
data.scanlines_per_task = scanlines_per_task;
data.total_scanlines = total_scanlines;
const int total_tasks = (total_scanlines + scanlines_per_task - 1) / scanlines_per_task;
TaskPool *task_pool = BLI_task_pool_create(&data, TASK_PRIORITY_LOW, TASK_ISOLATION_ON);
for (int i = 0, start_line = 0; i < total_tasks; i++) {
BLI_task_pool_push(
task_pool, processor_apply_scanline_func, POINTER_FROM_INT(start_line), false, NULL);
start_line += scanlines_per_task;
}
TaskParallelSettings settings;
ScanlineGlobalData data = {
.do_thread = do_thread,
.custom_data = custom_data,
};
/* work and wait until tasks are done */
BLI_task_pool_work_and_wait(task_pool);
/* Free memory. */
BLI_task_pool_free(task_pool);
BLI_parallel_range_settings_defaults(&settings);
BLI_task_parallel_range(0, total_scanlines, &data, processor_apply_parallel, &settings);
}
/** \} */

View File

@ -988,8 +988,9 @@ typedef struct RectBlendThreadData {
bool accumulate;
} RectBlendThreadData;
static void rectblend_thread_do(void *data_v, int start_scanline, int num_scanlines)
static void rectblend_thread_do(void *data_v, int scanline)
{
const int num_scanlines = 1;
RectBlendThreadData *data = (RectBlendThreadData *)data_v;
IMB_rectblend(data->dbuf,
data->obuf,
@ -999,11 +1000,11 @@ static void rectblend_thread_do(void *data_v, int start_scanline, int num_scanli
data->texmask,
data->mask_max,
data->destx,
data->desty + start_scanline,
data->desty + scanline,
data->origx,
data->origy + start_scanline,
data->origy + scanline,
data->srcx,
data->srcy + start_scanline,
data->srcy + scanline,
data->width,
num_scanlines,
data->mode,