Image viewer scopes update: OMP->BLI_task.
Gives over 50% faster scope update (from 4.5ms to 2.2ms here with SD shot)! Probably mostly due to more clever usage of thread-local data (which avoids any lock, when OMP code had a rather stupid critical section for minmax)...
This commit is contained in:
parent
d5e0e681ce
commit
f87842a73a
|
@ -43,6 +43,7 @@
|
|||
#include "BLI_blenlib.h"
|
||||
#include "BLI_math.h"
|
||||
#include "BLI_utildefines.h"
|
||||
#include "BLI_task.h"
|
||||
#include "BLI_threads.h"
|
||||
|
||||
#include "BKE_colortools.h"
|
||||
|
@ -53,10 +54,6 @@
|
|||
#include "IMB_colormanagement.h"
|
||||
#include "IMB_imbuf_types.h"
|
||||
|
||||
#ifdef _OPENMP
|
||||
# include <omp.h>
|
||||
#endif
|
||||
|
||||
/* ********************************* color curve ********************* */
|
||||
|
||||
/* ***************** operations on full struct ************* */
|
||||
|
@ -1089,31 +1086,170 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM
|
|||
}
|
||||
|
||||
/* if view_settings, it also applies this to byte buffers */
|
||||
typedef struct ScopesUpdateData {
|
||||
Scopes *scopes;
|
||||
const ImBuf *ibuf;
|
||||
struct ColormanageProcessor *cm_processor;
|
||||
const unsigned char *display_buffer;
|
||||
const int ycc_mode;
|
||||
|
||||
unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
|
||||
} ScopesUpdateData;
|
||||
|
||||
typedef struct ScopesUpdateDataChunk {
|
||||
unsigned int bin_lum[256];
|
||||
unsigned int bin_r[256];
|
||||
unsigned int bin_g[256];
|
||||
unsigned int bin_b[256];
|
||||
unsigned int bin_a[256];
|
||||
float min[3], max[3];
|
||||
} ScopesUpdateDataChunk;
|
||||
|
||||
static void scopes_update_cb(void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid))
|
||||
{
|
||||
const ScopesUpdateData *data = userdata;
|
||||
|
||||
Scopes *scopes = data->scopes;
|
||||
const ImBuf *ibuf = data->ibuf;
|
||||
struct ColormanageProcessor *cm_processor = data->cm_processor;
|
||||
const unsigned char *display_buffer = data->display_buffer;
|
||||
const int ycc_mode = data->ycc_mode;
|
||||
|
||||
ScopesUpdateDataChunk *data_chunk = userdata_chunk;
|
||||
unsigned int *bin_lum = data_chunk->bin_lum;
|
||||
unsigned int *bin_r = data_chunk->bin_r;
|
||||
unsigned int *bin_g = data_chunk->bin_g;
|
||||
unsigned int *bin_b = data_chunk->bin_b;
|
||||
unsigned int *bin_a = data_chunk->bin_a;
|
||||
float *min = data_chunk->min;
|
||||
float *max = data_chunk->max;
|
||||
|
||||
const float *rf = NULL;
|
||||
const unsigned char *rc = NULL;
|
||||
const int rows_per_sample_line = ibuf->y / scopes->sample_lines;
|
||||
const int savedlines = y / rows_per_sample_line;
|
||||
const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0;
|
||||
const bool is_float = (ibuf->rect_float != NULL);
|
||||
|
||||
if (is_float)
|
||||
rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
|
||||
else {
|
||||
rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels;
|
||||
}
|
||||
|
||||
for (int x = 0; x < ibuf->x; x++) {
|
||||
float rgba[4], ycc[3], luma;
|
||||
|
||||
if (is_float) {
|
||||
switch (ibuf->channels) {
|
||||
case 4:
|
||||
copy_v4_v4(rgba, rf);
|
||||
IMB_colormanagement_processor_apply_v4(cm_processor, rgba);
|
||||
break;
|
||||
case 3:
|
||||
copy_v3_v3(rgba, rf);
|
||||
IMB_colormanagement_processor_apply_v3(cm_processor, rgba);
|
||||
rgba[3] = 1.0f;
|
||||
break;
|
||||
case 2:
|
||||
copy_v3_fl(rgba, rf[0]);
|
||||
rgba[3] = rf[1];
|
||||
break;
|
||||
case 1:
|
||||
copy_v3_fl(rgba, rf[0]);
|
||||
rgba[3] = 1.0f;
|
||||
break;
|
||||
default:
|
||||
BLI_assert(0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int c = 4; c--;)
|
||||
rgba[c] = rc[c] * INV_255;
|
||||
}
|
||||
|
||||
/* we still need luma for histogram */
|
||||
luma = IMB_colormanagement_get_luminance(rgba);
|
||||
|
||||
/* check for min max */
|
||||
if (ycc_mode == -1) {
|
||||
minmax_v3v3_v3(min, max, rgba);
|
||||
}
|
||||
else {
|
||||
rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
|
||||
mul_v3_fl(ycc, INV_255);
|
||||
minmax_v3v3_v3(min, max, ycc);
|
||||
}
|
||||
/* increment count for histo*/
|
||||
bin_lum[get_bin_float(luma)]++;
|
||||
bin_r[get_bin_float(rgba[0])]++;
|
||||
bin_g[get_bin_float(rgba[1])]++;
|
||||
bin_b[get_bin_float(rgba[2])]++;
|
||||
bin_a[get_bin_float(rgba[3])]++;
|
||||
|
||||
/* save sample if needed */
|
||||
if (do_sample_line) {
|
||||
const float fx = (float)x / (float)ibuf->x;
|
||||
const int idx = 2 * (ibuf->x * savedlines + x);
|
||||
save_sample_line(scopes, idx, fx, rgba, ycc);
|
||||
}
|
||||
|
||||
rf += ibuf->channels;
|
||||
rc += ibuf->channels;
|
||||
}
|
||||
}
|
||||
|
||||
static void scopes_update_finalize(void *userdata, void *userdata_chunk)
|
||||
{
|
||||
const ScopesUpdateData *data = userdata;
|
||||
const ScopesUpdateDataChunk *data_chunk = userdata_chunk;
|
||||
|
||||
unsigned int *bin_lum = data->bin_lum;
|
||||
unsigned int *bin_r = data->bin_r;
|
||||
unsigned int *bin_g = data->bin_g;
|
||||
unsigned int *bin_b = data->bin_b;
|
||||
unsigned int *bin_a = data->bin_a;
|
||||
const unsigned int *bin_lum_c = data_chunk->bin_lum;
|
||||
const unsigned int *bin_r_c = data_chunk->bin_r;
|
||||
const unsigned int *bin_g_c = data_chunk->bin_g;
|
||||
const unsigned int *bin_b_c = data_chunk->bin_b;
|
||||
const unsigned int *bin_a_c = data_chunk->bin_a;
|
||||
|
||||
float (*minmax)[2] = data->scopes->minmax;
|
||||
const float *min = data_chunk->min;
|
||||
const float *max = data_chunk->max;
|
||||
|
||||
for (int b = 256; b--;) {
|
||||
bin_lum[b] += bin_lum_c[b];
|
||||
bin_r[b] += bin_r_c[b];
|
||||
bin_g[b] += bin_g_c[b];
|
||||
bin_b[b] += bin_b_c[b];
|
||||
bin_a[b] += bin_a_c[b];
|
||||
}
|
||||
|
||||
for (int c = 3; c--;) {
|
||||
if (min[c] < minmax[c][0])
|
||||
minmax[c][0] = min[c];
|
||||
if (max[c] > minmax[c][1])
|
||||
minmax[c][1] = max[c];
|
||||
}
|
||||
}
|
||||
|
||||
void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings,
|
||||
const ColorManagedDisplaySettings *display_settings)
|
||||
{
|
||||
#ifdef _OPENMP
|
||||
const int num_threads = BLI_system_thread_count();
|
||||
#endif
|
||||
int a, y;
|
||||
int a;
|
||||
unsigned int nl, na, nr, ng, nb;
|
||||
double divl, diva, divr, divg, divb;
|
||||
unsigned char *display_buffer;
|
||||
const unsigned char *display_buffer = NULL;
|
||||
unsigned int bin_lum[256] = {0},
|
||||
bin_r[256] = {0},
|
||||
bin_g[256] = {0},
|
||||
bin_b[256] = {0},
|
||||
bin_a[256] = {0};
|
||||
unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}},
|
||||
bin_r_t[BLENDER_MAX_THREADS][256] = {{0}},
|
||||
bin_g_t[BLENDER_MAX_THREADS][256] = {{0}},
|
||||
bin_b_t[BLENDER_MAX_THREADS][256] = {{0}},
|
||||
bin_a_t[BLENDER_MAX_THREADS][256] = {{0}};
|
||||
int ycc_mode = -1;
|
||||
const bool is_float = (ibuf->rect_float != NULL);
|
||||
void *cache_handle = NULL;
|
||||
struct ColormanageProcessor *cm_processor = NULL;
|
||||
int rows_per_sample_line;
|
||||
|
||||
if (ibuf->rect == NULL && ibuf->rect_float == NULL) return;
|
||||
|
||||
|
@ -1151,7 +1287,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
|
|||
scopes->sample_lines = ibuf->y;
|
||||
|
||||
/* scan the image */
|
||||
rows_per_sample_line = ibuf->y / scopes->sample_lines;
|
||||
for (a = 0; a < 3; a++) {
|
||||
scopes->minmax[a][0] = 25500.0f;
|
||||
scopes->minmax[a][1] = -25500.0f;
|
||||
|
@ -1177,129 +1312,21 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
|
|||
cm_processor = IMB_colormanagement_display_processor_new(view_settings, display_settings);
|
||||
}
|
||||
else {
|
||||
display_buffer = (unsigned char *)IMB_display_buffer_acquire(ibuf,
|
||||
view_settings,
|
||||
display_settings,
|
||||
&cache_handle);
|
||||
display_buffer = (const unsigned char *)IMB_display_buffer_acquire(
|
||||
ibuf, view_settings, display_settings, &cache_handle);
|
||||
}
|
||||
|
||||
/* Keep number of threads in sync with the merge parts below. */
|
||||
#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if (ibuf->y > 256)
|
||||
for (y = 0; y < ibuf->y; y++) {
|
||||
#ifdef _OPENMP
|
||||
const int thread_idx = omp_get_thread_num();
|
||||
#else
|
||||
const int thread_idx = 0;
|
||||
#endif
|
||||
const float *rf = NULL;
|
||||
const unsigned char *rc = NULL;
|
||||
const int savedlines = y / rows_per_sample_line;
|
||||
const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0;
|
||||
float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX},
|
||||
max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
int x, c;
|
||||
if (is_float)
|
||||
rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
|
||||
else {
|
||||
rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels;
|
||||
}
|
||||
for (x = 0; x < ibuf->x; x++) {
|
||||
float rgba[4], ycc[3], luma;
|
||||
if (is_float) {
|
||||
ScopesUpdateData data = {
|
||||
.scopes = scopes, . ibuf = ibuf,
|
||||
.cm_processor = cm_processor, .display_buffer = display_buffer, .ycc_mode = ycc_mode,
|
||||
.bin_lum = bin_lum, .bin_r = bin_r, .bin_g = bin_g, .bin_b = bin_b, .bin_a = bin_a,
|
||||
};
|
||||
ScopesUpdateDataChunk data_chunk = {0};
|
||||
INIT_MINMAX(data_chunk.min, data_chunk.max);
|
||||
|
||||
switch (ibuf->channels) {
|
||||
case 4:
|
||||
copy_v4_v4(rgba, rf);
|
||||
IMB_colormanagement_processor_apply_v4(cm_processor, rgba);
|
||||
break;
|
||||
case 3:
|
||||
copy_v3_v3(rgba, rf);
|
||||
IMB_colormanagement_processor_apply_v3(cm_processor, rgba);
|
||||
rgba[3] = 1.0f;
|
||||
break;
|
||||
case 2:
|
||||
copy_v3_fl(rgba, rf[0]);
|
||||
rgba[3] = rf[1];
|
||||
break;
|
||||
case 1:
|
||||
copy_v3_fl(rgba, rf[0]);
|
||||
rgba[3] = 1.0f;
|
||||
break;
|
||||
default:
|
||||
BLI_assert(0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (c = 0; c < 4; c++)
|
||||
rgba[c] = rc[c] * INV_255;
|
||||
}
|
||||
|
||||
/* we still need luma for histogram */
|
||||
luma = IMB_colormanagement_get_luminance(rgba);
|
||||
|
||||
/* check for min max */
|
||||
if (ycc_mode == -1) {
|
||||
for (c = 0; c < 3; c++) {
|
||||
if (rgba[c] < min[c]) min[c] = rgba[c];
|
||||
if (rgba[c] > max[c]) max[c] = rgba[c];
|
||||
}
|
||||
}
|
||||
else {
|
||||
rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
|
||||
for (c = 0; c < 3; c++) {
|
||||
ycc[c] *= INV_255;
|
||||
if (ycc[c] < min[c]) min[c] = ycc[c];
|
||||
if (ycc[c] > max[c]) max[c] = ycc[c];
|
||||
}
|
||||
}
|
||||
/* increment count for histo*/
|
||||
bin_lum_t[thread_idx][get_bin_float(luma)] += 1;
|
||||
bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1;
|
||||
bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1;
|
||||
bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1;
|
||||
bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1;
|
||||
|
||||
/* save sample if needed */
|
||||
if (do_sample_line) {
|
||||
const float fx = (float)x / (float)ibuf->x;
|
||||
const int idx = 2 * (ibuf->x * savedlines + x);
|
||||
save_sample_line(scopes, idx, fx, rgba, ycc);
|
||||
}
|
||||
|
||||
rf += ibuf->channels;
|
||||
rc += ibuf->channels;
|
||||
}
|
||||
#pragma omp critical
|
||||
{
|
||||
for (c = 0; c < 3; c++) {
|
||||
if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c];
|
||||
if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (ibuf->y > 256) {
|
||||
for (a = 0; a < num_threads; a++) {
|
||||
int b;
|
||||
for (b = 0; b < 256; b++) {
|
||||
bin_lum[b] += bin_lum_t[a][b];
|
||||
bin_r[b] += bin_r_t[a][b];
|
||||
bin_g[b] += bin_g_t[a][b];
|
||||
bin_b[b] += bin_b_t[a][b];
|
||||
bin_a[b] += bin_a_t[a][b];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum));
|
||||
memcpy(bin_r, bin_r_t[0], sizeof(bin_r));
|
||||
memcpy(bin_g, bin_g_t[0], sizeof(bin_g));
|
||||
memcpy(bin_b, bin_b_t[0], sizeof(bin_b));
|
||||
memcpy(bin_a, bin_a_t[0], sizeof(bin_a));
|
||||
}
|
||||
BLI_task_parallel_range_finalize(0, ibuf->y, &data, &data_chunk, sizeof(data_chunk),
|
||||
scopes_update_cb, scopes_update_finalize, ibuf->y > 256, false);
|
||||
|
||||
/* test for nicer distribution even - non standard, leave it out for a while */
|
||||
#if 0
|
||||
|
|
Loading…
Reference in New Issue