Math: optimizations for 4x4x matrix inverse, multiplications.

In some heavy rigs matrix inverse can be 10% of computation time. This
reduces it to 2% by using Eigen's optimized 4x4 matrix inverse and SSE
matrix multiplication.
This commit is contained in:
Brecht Van Lommel 2018-05-31 16:36:20 +02:00
parent 719e782f2c
commit 01c75c3765
Notes: blender-bot 2023-02-14 10:14:07 +01:00
Referenced by commit 036e95bb21, Fix T57767: Pivot point broken after scaling to 0 in a dimension
Referenced by issue #57767, Pivot point broken after scaling a flat object to 0 in a dimension
Referenced by issue #55228, Performance regression of production file
7 changed files with 128 additions and 66 deletions

View File

@ -32,6 +32,7 @@ macro(BLENDER_SRC_GTEST_EX NAME SRC EXTRA_LIBS DO_ADD_TEST)
${EXTRA_LIBS}
${PLATFORM_LINKLIBS}
bf_testing_main
bf_intern_eigen
bf_intern_guardedalloc
extern_gtest
extern_gmock

View File

@ -36,10 +36,12 @@ set(SRC
intern/eigenvalues.cc
intern/linear_solver.cc
intern/matrix.cc
intern/svd.cc
intern/eigenvalues.h
intern/linear_solver.h
intern/matrix.h
intern/svd.h
)

View File

@ -29,6 +29,7 @@
#include "intern/eigenvalues.h"
#include "intern/linear_solver.h"
#include "intern/matrix.h"
#include "intern/svd.h"
#endif /* __EIGEN_C_API_H__ */

View File

@ -0,0 +1,56 @@
/*
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* The Original Code is Copyright (C) 2015 Blender Foundation.
* All rights reserved.
*
* Contributor(s): Blender Foundation,
* Bastien Montagne
*
* ***** END GPL LICENSE BLOCK *****
*/
#ifndef __EIGEN3_MATRIX_C_API_CC__
#define __EIGEN3_MATRIX_C_API_CC__
/* Eigen gives annoying huge amount of warnings here, silence them! */
#if defined(__GNUC__) && !defined(__clang__)
# pragma GCC diagnostic ignored "-Wlogical-op"
#endif
#ifdef __EIGEN3_MATRIX_C_API_CC__ /* quiet warning */
#endif
#include <Eigen/Core>
#include <Eigen/Dense>
#include "matrix.h"
using Eigen::Map;
using Eigen::Matrix4f;
bool EIG_invert_m4_m4(float inverse[4][4], const float matrix[4][4])
{
Map<Matrix4f> M = Map<Matrix4f>((float*)matrix);
Matrix4f R;
bool invertible = true;
M.computeInverseWithCheck(R, invertible, 0.0f);
memcpy(inverse, R.data(), sizeof(float)*4*4);
return invertible;
}
#endif /* __EIGEN3_MATRIX_C_API_CC__ */

View File

@ -0,0 +1,40 @@
/*
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* The Original Code is Copyright (C) 2015 Blender Foundation.
* All rights reserved.
*
* Contributor(s): Blender Foundation,
* Bastien Montagne
*
* ***** END GPL LICENSE BLOCK *****
*/
#ifndef __EIGEN3_MATRIX_C_API_H__
#define __EIGEN3_MATRIX_C_API_H__
#ifdef __cplusplus
extern "C" {
#endif
bool EIG_invert_m4_m4(float inverse[4][4], const float matrix[4][4]);
#ifdef __cplusplus
}
#endif
#endif /* __EIGEN3_MATRIX_C_API_H__ */

View File

@ -37,6 +37,7 @@
#include <Eigen/Core>
#include <Eigen/SVD>
#include <Eigen/Dense>
#include "svd.h"
@ -51,6 +52,8 @@ using Eigen::MatrixXf;
using Eigen::VectorXf;
using Eigen::Map;
using Eigen::Matrix4f;
void EIG_svd_square_matrix(const int size, const float *matrix, float *r_U, float *r_S, float *r_V)
{
/* Since our matrix is squared, we can use thinU/V. */

View File

@ -33,6 +33,8 @@
#include "BLI_strict_flags.h"
#include "eigen_capi.h"
/********************************* Init **************************************/
void zero_m2(float m[2][2])
@ -192,6 +194,25 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
BLI_assert(R != A && R != B);
/* matrix product: R[j][k] = A[j][i] . B[i][k] */
#ifdef __SSE2__
__m128 A0 = _mm_loadu_ps(A[0]);
__m128 A1 = _mm_loadu_ps(A[1]);
__m128 A2 = _mm_loadu_ps(A[2]);
__m128 A3 = _mm_loadu_ps(A[3]);
for (int i = 0; i < 4; i++) {
__m128 B0 = _mm_set1_ps(B[i][0]);
__m128 B1 = _mm_set1_ps(B[i][1]);
__m128 B2 = _mm_set1_ps(B[i][2]);
__m128 B3 = _mm_set1_ps(B[i][3]);
__m128 sum = _mm_add_ps(
_mm_add_ps(_mm_mul_ps(B0, A0), _mm_mul_ps(B1, A1)),
_mm_add_ps(_mm_mul_ps(B2, A2), _mm_mul_ps(B3, A3)));
_mm_storeu_ps(R[i], sum);
}
#else
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2] + B[0][3] * A[3][2];
@ -211,6 +232,7 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
R[3][1] = B[3][0] * A[0][1] + B[3][1] * A[1][1] + B[3][2] * A[2][1] + B[3][3] * A[3][1];
R[3][2] = B[3][0] * A[0][2] + B[3][1] * A[1][2] + B[3][2] * A[2][2] + B[3][3] * A[3][2];
R[3][3] = B[3][0] * A[0][3] + B[3][1] * A[1][3] + B[3][2] * A[2][3] + B[3][3] * A[3][3];
#endif
}
void mul_m4_m4_pre(float R[4][4], const float A[4][4])
@ -875,74 +897,11 @@ bool invert_m4(float m[4][4])
return success;
}
/*
* invertmat -
* computes the inverse of mat and puts it in inverse. Returns
* true on success (i.e. can always find a pivot) and false on failure.
* Uses Gaussian Elimination with partial (maximal column) pivoting.
*
* Mark Segal - 1992
*/
bool invert_m4_m4(float inverse[4][4], const float mat[4][4])
{
int i, j, k;
double temp;
float tempmat[4][4];
float max;
int maxj;
BLI_assert(inverse != mat);
/* Set inverse to identity */
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
inverse[i][j] = 0;
for (i = 0; i < 4; i++)
inverse[i][i] = 1;
/* Copy original matrix so we don't mess it up */
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
tempmat[i][j] = mat[i][j];
for (i = 0; i < 4; i++) {
/* Look for row with max pivot */
max = fabsf(tempmat[i][i]);
maxj = i;
for (j = i + 1; j < 4; j++) {
if (fabsf(tempmat[j][i]) > max) {
max = fabsf(tempmat[j][i]);
maxj = j;
}
}
/* Swap rows if necessary */
if (maxj != i) {
for (k = 0; k < 4; k++) {
SWAP(float, tempmat[i][k], tempmat[maxj][k]);
SWAP(float, inverse[i][k], inverse[maxj][k]);
}
}
if (UNLIKELY(tempmat[i][i] == 0.0f)) {
return false; /* No non-zero pivot */
}
temp = (double)tempmat[i][i];
for (k = 0; k < 4; k++) {
tempmat[i][k] = (float)((double)tempmat[i][k] / temp);
inverse[i][k] = (float)((double)inverse[i][k] / temp);
}
for (j = 0; j < 4; j++) {
if (j != i) {
temp = tempmat[j][i];
for (k = 0; k < 4; k++) {
tempmat[j][k] -= (float)((double)tempmat[i][k] * temp);
inverse[j][k] -= (float)((double)inverse[i][k] * temp);
}
}
}
}
return true;
/* Use optimized matrix inverse from Eigen, since performance
* impact of this function is significant in complex rigs. */
return EIG_invert_m4_m4(inverse, mat);
}
/****************************** Linear Algebra *******************************/