Make scopes update multi-threaded

This commits makes scopes evaluation multithreaded by using OpenMP
for the outer loop of pixel processor. it also makes all the changes
needed for keeping performance as high as possible by keeping data
local to thread for as long as it's possible.

Reviewers: campbellbarton

Differential Revision: https://developer.blender.org/D1334
This commit is contained in:
Sergey Sharybin 2015-05-19 20:18:26 +05:00
parent 9c5995c062
commit 774e034d40
Notes: blender-bot 2023-02-14 09:00:19 +01:00
Referenced by issue #45086, Crash showing scopes
1 changed files with 77 additions and 30 deletions

View File

@ -43,6 +43,7 @@
#include "BLI_blenlib.h"
#include "BLI_math.h"
#include "BLI_utildefines.h"
#include "BLI_threads.h"
#include "BKE_colortools.h"
#include "BKE_curve.h"
@ -52,6 +53,10 @@
#include "IMB_colormanagement.h"
#include "IMB_imbuf_types.h"
#ifdef _OPENMP
# include <omp.h>
#endif
/* ********************************* color curve ********************* */
/* ***************** operations on full struct ************* */
@ -1029,11 +1034,25 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM
void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings,
const ColorManagedDisplaySettings *display_settings)
{
#ifdef _OPENMP
const int num_threads = BLI_system_thread_count();
#else
const int num_threads = 1;
#endif
int a, y;
unsigned int nl, na, nr, ng, nb;
double divl, diva, divr, divg, divb;
unsigned char *display_buffer;
unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
unsigned int bin_lum[256] = {0},
bin_r[256] = {0},
bin_g[256] = {0},
bin_b[256] = {0},
bin_a[256] = {0};
unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}},
bin_r_t[BLENDER_MAX_THREADS][256] = {{0}},
bin_g_t[BLENDER_MAX_THREADS][256] = {{0}},
bin_b_t[BLENDER_MAX_THREADS][256] = {{0}},
bin_a_t[BLENDER_MAX_THREADS][256] = {{0}};
int ycc_mode = -1;
const bool is_float = (ibuf->rect_float != NULL);
void *cache_handle = NULL;
@ -1068,13 +1087,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
break;
}
/* temp table to count pix value for histogram */
bin_r = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
bin_g = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
bin_b = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
bin_a = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
bin_lum = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
/* convert to number of lines with logarithmic scale */
scopes->sample_lines = (scopes->accuracy * 0.01f) * (scopes->accuracy * 0.01f) * ibuf->y;
@ -1114,9 +1126,19 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
&cache_handle);
}
/* Keep number of threads in sync with the merge parts below. */
#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if(ibuf->y > 256)
for (y = 0; y < ibuf->y; y++) {
#ifdef _OPENMP
const int thread_idx = omp_get_thread_num();
#else
const int thread_idx = 0;
#endif
const float *rf = NULL;
unsigned char *rc = NULL;
const unsigned char *rc = NULL;
const bool do_sample_line = (y % rows_per_sample_line) == 0;
float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX},
max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
int x, c;
if (is_float)
rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
@ -1140,27 +1162,27 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
/* check for min max */
if (ycc_mode == -1) {
for (c = 0; c < 3; c++) {
if (rgba[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = rgba[c];
if (rgba[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = rgba[c];
if (rgba[c] < min[c]) min[c] = rgba[c];
if (rgba[c] > max[c]) max[c] = rgba[c];
}
}
else {
rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
for (c = 0; c < 3; c++) {
ycc[c] *= INV_255;
if (ycc[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = ycc[c];
if (ycc[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = ycc[c];
if (ycc[c] < min[c]) min[c] = ycc[c];
if (ycc[c] > max[c]) max[c] = ycc[c];
}
}
/* increment count for histo*/
bin_lum[get_bin_float(luma)] += 1;
bin_r[get_bin_float(rgba[0])] += 1;
bin_g[get_bin_float(rgba[1])] += 1;
bin_b[get_bin_float(rgba[2])] += 1;
bin_a[get_bin_float(rgba[3])] += 1;
bin_lum_t[thread_idx][get_bin_float(luma)] += 1;
bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1;
bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1;
bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1;
bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1;
/* save sample if needed */
if (y % rows_per_sample_line == 0) {
if (do_sample_line) {
const float fx = (float)x / (float)ibuf->x;
const int savedlines = y / rows_per_sample_line;
const int idx = 2 * (ibuf->x * savedlines + x);
@ -1170,16 +1192,46 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
rf += ibuf->channels;
rc += ibuf->channels;
}
#pragma omp critical
{
for (c = 0; c < 3; c++) {
if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c];
if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c];
}
}
}
#ifdef _OPENMP
if (ibuf->y > 256) {
for (a = 0; a < num_threads; a++) {
int b;
for (b = 0; b < 256; b++) {
bin_lum[b] += bin_lum_t[a][b];
bin_r[b] += bin_r_t[a][b];
bin_g[b] += bin_g_t[a][b];
bin_b[b] += bin_b_t[a][b];
bin_a[b] += bin_a_t[a][b];
}
}
}
else
#endif
{
memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum));
memcpy(bin_r, bin_r_t[0], sizeof(bin_r));
memcpy(bin_g, bin_g_t[0], sizeof(bin_g));
memcpy(bin_b, bin_b_t[0], sizeof(bin_b));
memcpy(bin_a, bin_a_t[0], sizeof(bin_a));
}
/* test for nicer distribution even - non standard, leave it out for a while */
#if 0
for (x = 0; x < 256; x++) {
bin_lum[x] = sqrt (bin_lum[x]);
bin_r[x] = sqrt(bin_r[x]);
bin_g[x] = sqrt(bin_g[x]);
bin_b[x] = sqrt(bin_b[x]);
bin_a[x] = sqrt(bin_a[x]);
for (a = 0; a < 256; a++) {
bin_lum[a] = sqrt (bin_lum[a]);
bin_r[a] = sqrt(bin_r[a]);
bin_g[a] = sqrt(bin_g[a]);
bin_b[a] = sqrt(bin_b[a]);
bin_a[a] = sqrt(bin_a[a]);
}
#endif
@ -1205,11 +1257,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
scopes->hist.data_b[a] = bin_b[a] * divb;
scopes->hist.data_a[a] = bin_a[a] * diva;
}
MEM_freeN(bin_lum);
MEM_freeN(bin_r);
MEM_freeN(bin_g);
MEM_freeN(bin_b);
MEM_freeN(bin_a);
if (cm_processor)
IMB_colormanagement_processor_free(cm_processor);