Meshdeform: Use SSE intrinsics in meshdeform_dynamic_bind()

Quite straightforward change, don't think some extra explanation is needed.

This gives about 15% speedup of the modifier evaluation on my laptop.

Reviewers: campbellbarton

Differential Revision: https://developer.blender.org/D836
This commit is contained in:
Sergey Sharybin 2014-10-21 11:37:29 +02:00
parent dfc4de036e
commit ba7e504448
Notes: blender-bot 2023-02-14 09:54:16 +01:00
Referenced by issue #42413, Erroneous world_matrix when object scaled
Referenced by issue #42367, Spline IK leaves a gap at the start of the bone chain
Referenced by issue #42349, Windows Render command line Cycles Crash!
1 changed files with 30 additions and 3 deletions

View File

@ -54,6 +54,9 @@
#include "MOD_util.h"
#ifdef __SSE2__
# include <emmintrin.h>
#endif
static void initData(ModifierData *md)
{
@ -134,11 +137,15 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
{
MDefCell *cell;
MDefInfluence *inf;
float gridvec[3], dvec[3], ivec[3], co[3], wx, wy, wz;
float gridvec[3], dvec[3], ivec[3], wx, wy, wz;
float weight, cageweight, totweight, *cageco;
int i, j, a, x, y, z, size;
#ifdef __SSE2__
__m128 co = _mm_setzero_ps();
#else
float co[3] = {0.0f, 0.0f, 0.0f};
#endif
zero_v3(co);
totweight = 0.0f;
size = mmd->dyngridsize;
@ -170,14 +177,30 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
for (j = 0; j < cell->totinfluence; j++, inf++) {
cageco = dco[inf->vertex];
cageweight = weight * inf->weight;
#ifdef __SSE2__
{
__m128 cageweight_r = _mm_set1_ps(cageweight);
/* This will load one extra element, this is ok because
* we ignore that part of reigister anyway.
*/
__m128 cageco_r = _mm_loadu_ps(cageco);
co = _mm_add_ps(co,
_mm_mul_ps(cageco_r, cageweight_r));
}
#else
co[0] += cageweight * cageco[0];
co[1] += cageweight * cageco[1];
co[2] += cageweight * cageco[2];
#endif
totweight += cageweight;
}
}
#ifdef __SSE2__
copy_v3_v3(vec, (float*)&co);
#else
copy_v3_v3(vec, co);
#endif
return totweight;
}
@ -344,7 +367,11 @@ static void meshdeformModifier_do(
cagedm->getVertCos(cagedm, cagecos);
bindcagecos = (float(*)[3])mmd->bindcagecos;
dco = MEM_callocN(sizeof(*dco) * totcagevert, "MDefDco");
/* We allocate 1 element extra to make it possible to
* load the values to SSE registers, which are float4.
*/
dco = MEM_callocN(sizeof(*dco) * (totcagevert + 1), "MDefDco");
zero_v3(dco[totcagevert]);
for (a = 0; a < totcagevert; a++) {
/* get cage vertex in world space with binding transform */
copy_v3_v3(co, cagecos[a]);