BLI_math_statistics: switch from OMP to BLI_task.

This time, with have over 300% speedup!

But no, this is not due to switch to BLI_task (which 'only' gives usal 15% speedup),
but to enhancement of the algorithm, flatten loop over covariance matrix items now allows
to compute (usually) all items in parallel, instead of having at most 3 or 4 working threads
(with unbalanced load even)...
This commit is contained in:
Bastien Montagne 2015-12-28 22:57:55 +01:00
parent d08e9883bd
commit 971f9e1c25
1 changed files with 64 additions and 21 deletions

View File

@ -30,12 +30,66 @@
#include "MEM_guardedalloc.h"
#include "BLI_math.h"
#include "BLI_task.h"
#include "BLI_utildefines.h"
#include "BLI_strict_flags.h"
/********************************** Covariance Matrices *********************************/
typedef struct CovarianceData {
const float *cos_vn;
const float *center;
float *r_covmat;
float covfac;
int n;
int nbr_cos_vn;
} CovarianceData;
static void covariance_m_vn_ex_task_cb(void *userdata, void *UNUSED(userdata_chunk), int a)
{
CovarianceData *data = userdata;
const float *cos_vn = data->cos_vn;
const float *center = data->center;
float *r_covmat = data->r_covmat;
const int n = data->n;
const int nbr_cos_vn = data->nbr_cos_vn;
int k;
/* Covariance matrices are always symetrical, so we can compute only one half of it,
* and mirror it to the other half (at the end of the func).
*
* This allows using a flat loop of n*n with same results as imbricated one over half the matrix:
*
* for (i = 0; i < n; i++) {
* for (j = i; j < n; j++) {
* ...
* }
* }
*/
const int i = a / n;
const int j = a % n;
if (j < i)
return;
if (center) {
for (k = 0; k < nbr_cos_vn; k++) {
r_covmat[a] += (cos_vn[k * n + i] - center[i]) * (cos_vn[k * n + j] - center[j]);
}
}
else {
for (k = 0; k < nbr_cos_vn; k++) {
r_covmat[a] += cos_vn[k * n + i] * cos_vn[k * n + j];
}
}
r_covmat[a] *= data->covfac;
if (j != i) {
/* Mirror result to other half... */
r_covmat[j * n + i] = r_covmat[a];
}
}
/**
* \brief Compute the covariance matrix of given set of nD coordinates.
*
@ -60,28 +114,17 @@ void BLI_covariance_m_vn_ex(
memset(r_covmat, 0, sizeof(*r_covmat) * (size_t)(n * n));
#pragma omp parallel for default(shared) private(i, j, k) schedule(static) if ((nbr_cos_vn * n) >= 10000)
for (i = 0; i < n; i++) {
for (j = i; j < n; j++) {
r_covmat[i * n + j] = 0.0f;
if (center) {
for (k = 0; k < nbr_cos_vn; k++) {
r_covmat[i * n + j] += (cos_vn[k * n + i] - center[i]) * (cos_vn[k * n + j] - center[j]);
}
}
else {
for (k = 0; k < nbr_cos_vn; k++) {
r_covmat[i * n + j] += cos_vn[k * n + i] * cos_vn[k * n + j];
}
}
r_covmat[i * n + j] *= covfac;
}
CovarianceData data = {
.cos_vn = cos_vn, .center = center, .r_covmat = r_covmat,
.covfac = covfac, .n = n, .nbr_cos_vn = nbr_cos_vn,
};
if ((nbr_cos_vn * n * n) >= 10000) {
BLI_task_parallel_range_ex(0, n * n, &data, NULL, 0, covariance_m_vn_ex_task_cb, 0, false);
}
/* Covariance matrices are always symetrical, so we can compute only one half of it (as done above),
* and copy it to the other half! */
for (i = 1; i < n; i++) {
for (j = 0; j < i; j++) {
r_covmat[i * n + j] = r_covmat[j * n + i];
else {
for (k = 0; k < n * n; k++) {
covariance_m_vn_ex_task_cb(&data, NULL, k);
}
}
}