Meshdeform: Use SSE intrinsics in meshdeform_dynamic_bind()
Quite straightforward change, don't think some extra explanation is needed. This gives about 15% speedup of the modifier evaluation on my laptop. Reviewers: campbellbarton Differential Revision: https://developer.blender.org/D836
This commit is contained in:
parent
dfc4de036e
commit
ba7e504448
Notes:
blender-bot
2023-02-14 09:54:16 +01:00
Referenced by issue #42413, Erroneous world_matrix when object scaled Referenced by issue #42367, Spline IK leaves a gap at the start of the bone chain Referenced by issue #42349, Windows Render command line Cycles Crash!
|
@ -54,6 +54,9 @@
|
|||
|
||||
#include "MOD_util.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
static void initData(ModifierData *md)
|
||||
{
|
||||
|
@ -134,11 +137,15 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
|
|||
{
|
||||
MDefCell *cell;
|
||||
MDefInfluence *inf;
|
||||
float gridvec[3], dvec[3], ivec[3], co[3], wx, wy, wz;
|
||||
float gridvec[3], dvec[3], ivec[3], wx, wy, wz;
|
||||
float weight, cageweight, totweight, *cageco;
|
||||
int i, j, a, x, y, z, size;
|
||||
#ifdef __SSE2__
|
||||
__m128 co = _mm_setzero_ps();
|
||||
#else
|
||||
float co[3] = {0.0f, 0.0f, 0.0f};
|
||||
#endif
|
||||
|
||||
zero_v3(co);
|
||||
totweight = 0.0f;
|
||||
size = mmd->dyngridsize;
|
||||
|
||||
|
@ -170,14 +177,30 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
|
|||
for (j = 0; j < cell->totinfluence; j++, inf++) {
|
||||
cageco = dco[inf->vertex];
|
||||
cageweight = weight * inf->weight;
|
||||
#ifdef __SSE2__
|
||||
{
|
||||
__m128 cageweight_r = _mm_set1_ps(cageweight);
|
||||
/* This will load one extra element, this is ok because
|
||||
* we ignore that part of reigister anyway.
|
||||
*/
|
||||
__m128 cageco_r = _mm_loadu_ps(cageco);
|
||||
co = _mm_add_ps(co,
|
||||
_mm_mul_ps(cageco_r, cageweight_r));
|
||||
}
|
||||
#else
|
||||
co[0] += cageweight * cageco[0];
|
||||
co[1] += cageweight * cageco[1];
|
||||
co[2] += cageweight * cageco[2];
|
||||
#endif
|
||||
totweight += cageweight;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
copy_v3_v3(vec, (float*)&co);
|
||||
#else
|
||||
copy_v3_v3(vec, co);
|
||||
#endif
|
||||
|
||||
return totweight;
|
||||
}
|
||||
|
@ -344,7 +367,11 @@ static void meshdeformModifier_do(
|
|||
cagedm->getVertCos(cagedm, cagecos);
|
||||
bindcagecos = (float(*)[3])mmd->bindcagecos;
|
||||
|
||||
dco = MEM_callocN(sizeof(*dco) * totcagevert, "MDefDco");
|
||||
/* We allocate 1 element extra to make it possible to
|
||||
* load the values to SSE registers, which are float4.
|
||||
*/
|
||||
dco = MEM_callocN(sizeof(*dco) * (totcagevert + 1), "MDefDco");
|
||||
zero_v3(dco[totcagevert]);
|
||||
for (a = 0; a < totcagevert; a++) {
|
||||
/* get cage vertex in world space with binding transform */
|
||||
copy_v3_v3(co, cagecos[a]);
|
||||
|
|
Loading…
Reference in New Issue