BLI_math: simplify matrix multiply logic

Improve safety and correctness of matrix multiplication by using
temporary storage if one of the inputs is also the output.

No functional changes.

Differential Revision: https://developer.blender.org/D16876
Reviewed By: Campbell Barton, Sergey Sharybin
This commit is contained in:
Chris Blackbourn 2023-01-17 11:14:52 +13:00
parent 7026096099
commit 6769acbbba
Notes: blender-bot 2023-02-14 07:25:46 +01:00
Referenced by commit d411de96f7, Fix T103936: Regression: Track To constraint broken
Referenced by issue #103936, Regression: Track To constraint broken
4 changed files with 94 additions and 148 deletions

View File

@ -83,16 +83,12 @@ void mul_m3_m4m4(float R[3][3], const float A[4][4], const float B[4][4]);
/**
* Special matrix multiplies
* - uniq: `R <-- AB`, R is neither A nor B
* - pre: `R <-- AR`
* - post: `R <-- RB`.
*/
void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3]);
void mul_m3_m3_pre(float R[3][3], const float A[3][3]);
void mul_m3_m3_post(float R[3][3], const float B[3][3]);
void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4]);
void mul_m4_m4m4_db_uniq(double R[4][4], const double A[4][4], const double B[4][4]);
void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B[4][4]);
void mul_m4db_m4db_m4fl(double R[4][4], const double A[4][4], const float B[4][4]);
void mul_m4_m4_pre(float R[4][4], const float A[4][4]);
void mul_m4_m4_post(float R[4][4], const float B[4][4]);

View File

@ -257,22 +257,14 @@ void shuffle_m4(float R[4][4], const int index[4])
void mul_m4_m4m4(float R[4][4], const float A[4][4], const float B[4][4])
{
if (A == R) {
mul_m4_m4_post(R, B);
if (R == A || R == B) {
float T[4][4];
mul_m4_m4m4(T, A, B);
copy_m4_m4(R, T);
return;
}
else if (B == R) {
mul_m4_m4_pre(R, A);
}
else {
mul_m4_m4m4_uniq(R, A, B);
}
}
void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
{
BLI_assert(!ELEM(R, A, B));
/* Matrix product: `R[j][k] = A[j][i] . B[i][k]`. */
/* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */
#ifdef BLI_HAVE_SSE2
__m128 A0 = _mm_loadu_ps(A[0]);
__m128 A1 = _mm_loadu_ps(A[1]);
@ -313,39 +305,16 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
#endif
}
void mul_m4_m4m4_db_uniq(double R[4][4], const double A[4][4], const double B[4][4])
void mul_m4db_m4db_m4fl(double R[4][4], const double A[4][4], const float B[4][4])
{
BLI_assert(!ELEM(R, A, B));
if (R == A) {
double T[4][4];
mul_m4db_m4db_m4fl(T, A, B);
copy_m4_m4_db(R, T);
return;
}
/* Matrix product: `R[j][k] = A[j][i] . B[i][k]`. */
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2] + B[0][3] * A[3][2];
R[0][3] = B[0][0] * A[0][3] + B[0][1] * A[1][3] + B[0][2] * A[2][3] + B[0][3] * A[3][3];
R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0] + B[1][3] * A[3][0];
R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1] + B[1][3] * A[3][1];
R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2] + B[1][3] * A[3][2];
R[1][3] = B[1][0] * A[0][3] + B[1][1] * A[1][3] + B[1][2] * A[2][3] + B[1][3] * A[3][3];
R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0] + B[2][3] * A[3][0];
R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1] + B[2][3] * A[3][1];
R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2] + B[2][3] * A[3][2];
R[2][3] = B[2][0] * A[0][3] + B[2][1] * A[1][3] + B[2][2] * A[2][3] + B[2][3] * A[3][3];
R[3][0] = B[3][0] * A[0][0] + B[3][1] * A[1][0] + B[3][2] * A[2][0] + B[3][3] * A[3][0];
R[3][1] = B[3][0] * A[0][1] + B[3][1] * A[1][1] + B[3][2] * A[2][1] + B[3][3] * A[3][1];
R[3][2] = B[3][0] * A[0][2] + B[3][1] * A[1][2] + B[3][2] * A[2][2] + B[3][3] * A[3][2];
R[3][3] = B[3][0] * A[0][3] + B[3][1] * A[1][3] + B[3][2] * A[2][3] + B[3][3] * A[3][3];
}
void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B[4][4])
{
/* Remove second check since types don't match. */
BLI_assert(!ELEM(R, A /*, B */));
/* Matrix product: `R[j][k] = A[j][i] . B[i][k]`. */
/* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1];
@ -370,53 +339,32 @@ void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B
void mul_m4_m4_pre(float R[4][4], const float A[4][4])
{
BLI_assert(A != R);
float B[4][4];
copy_m4_m4(B, R);
mul_m4_m4m4_uniq(R, A, B);
mul_m4_m4m4(R, A, R);
}
void mul_m4_m4_post(float R[4][4], const float B[4][4])
{
BLI_assert(B != R);
float A[4][4];
copy_m4_m4(A, R);
mul_m4_m4m4_uniq(R, A, B);
}
void mul_m3_m3m3(float R[3][3], const float A[3][3], const float B[3][3])
{
if (A == R) {
mul_m3_m3_post(R, B);
}
else if (B == R) {
mul_m3_m3_pre(R, A);
}
else {
mul_m3_m3m3_uniq(R, A, B);
}
mul_m4_m4m4(R, R, B);
}
void mul_m3_m3_pre(float R[3][3], const float A[3][3])
{
BLI_assert(A != R);
float B[3][3];
copy_m3_m3(B, R);
mul_m3_m3m3_uniq(R, A, B);
mul_m3_m3m3(R, A, R);
}
void mul_m3_m3_post(float R[3][3], const float B[3][3])
{
BLI_assert(B != R);
float A[3][3];
copy_m3_m3(A, R);
mul_m3_m3m3_uniq(R, A, B);
mul_m3_m3m3(R, R, B);
}
void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3])
void mul_m3_m3m3(float R[3][3], const float A[3][3], const float B[3][3])
{
BLI_assert(!ELEM(R, A, B));
if (R == A || R == B) {
float T[3][3];
mul_m3_m3m3(T, A, B);
copy_m3_m3(R, T);
return;
}
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2];
@ -432,88 +380,90 @@ void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3])
void mul_m4_m4m3(float R[4][4], const float A[4][4], const float B[3][3])
{
float B_[3][3], A_[4][4];
if (R == A) {
float T[4][4];
mul_m4_m4m3(T, A, B);
copy_m4_m4(R, T);
return;
}
/* copy so it works when R is the same pointer as A or B */
/* TODO: avoid copying when matrices are different */
copy_m4_m4(A_, A);
copy_m3_m3(B_, B);
R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0];
R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1];
R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2];
R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0];
R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1];
R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2];
R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0];
R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1];
R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2];
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2];
R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0];
R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1];
R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2];
R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0];
R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1];
R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2];
}
void mul_m3_m3m4(float R[3][3], const float A[3][3], const float B[4][4])
{
float B_[4][4], A_[3][3];
if (R == A) {
float T[3][3];
mul_m3_m3m4(T, A, B);
copy_m3_m3(R, T);
return;
}
/* copy so it works when R is the same pointer as A or B */
/* TODO: avoid copying when matrices are different */
copy_m3_m3(A_, A);
copy_m4_m4(B_, B);
/* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */
/* R[i][j] = B_[i][k] * A_[k][j] */
R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0];
R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1];
R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2];
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2];
R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0];
R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1];
R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2];
R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0];
R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1];
R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2];
R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0];
R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1];
R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2];
R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0];
R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1];
R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2];
}
void mul_m3_m4m3(float R[3][3], const float A[4][4], const float B[3][3])
{
float B_[3][3], A_[4][4];
if (R == B) {
float T[3][3];
mul_m3_m4m3(T, A, B);
copy_m3_m3(R, T);
return;
}
/* copy so it works when R is the same pointer as A or B */
/* TODO: avoid copying when matrices are different */
copy_m4_m4(A_, A);
copy_m3_m3(B_, B);
/* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */
/* R[i][j] = B[i][k] * A[k][j] */
R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0];
R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1];
R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2];
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2];
R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0];
R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1];
R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2];
R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0];
R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1];
R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2];
R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0];
R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1];
R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2];
R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0];
R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1];
R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2];
}
void mul_m4_m3m4(float R[4][4], const float A[3][3], const float B[4][4])
{
float B_[4][4], A_[3][3];
if (R == B) {
float T[4][4];
mul_m4_m3m4(T, A, B);
copy_m4_m4(R, T);
return;
}
/* copy so it works when R is the same pointer as A or B */
/* TODO: avoid copying when matrices are different */
copy_m3_m3(A_, A);
copy_m4_m4(B_, B);
R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0];
R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1];
R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2];
R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0];
R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1];
R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2];
R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0];
R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1];
R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2];
R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0];
R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1];
R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2];
R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0];
R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1];
R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2];
R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0];
R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1];
R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2];
}
void mul_m3_m4m4(float R[3][3], const float A[4][4], const float B[4][4])
@ -1304,7 +1254,7 @@ void mul_m4_m4m4_aligned_scale(float R[4][4], const float A[4][4], const float B
mat4_to_loc_rot_size(loc_b, rot_b, size_b, B);
mul_v3_m4v3(loc_r, A, loc_b);
mul_m3_m3m3_uniq(rot_r, rot_a, rot_b);
mul_m3_m3m3(rot_r, rot_a, rot_b);
mul_v3_v3v3(size_r, size_a, size_b);
loc_rot_size_to_mat4(R, loc_r, rot_r, size_r);
@ -1320,7 +1270,7 @@ void mul_m4_m4m4_split_channels(float R[4][4], const float A[4][4], const float
mat4_to_loc_rot_size(loc_b, rot_b, size_b, B);
add_v3_v3v3(loc_r, loc_a, loc_b);
mul_m3_m3m3_uniq(rot_r, rot_a, rot_b);
mul_m3_m3m3(rot_r, rot_a, rot_b);
mul_v3_v3v3(size_r, size_a, size_b);
loc_rot_size_to_mat4(R, loc_r, rot_r, size_r);

View File

@ -2447,8 +2447,8 @@ static void lineart_object_load_single_instance(LineartData *ld,
/* Prepare the matrix used for transforming this specific object (instance). This has to be
* done before mesh boundbox check because the function needs that. */
mul_m4db_m4db_m4fl_uniq(obi->model_view_proj, ld->conf.view_projection, use_mat);
mul_m4db_m4db_m4fl_uniq(obi->model_view, ld->conf.view, use_mat);
mul_m4db_m4db_m4fl(obi->model_view_proj, ld->conf.view_projection, use_mat);
mul_m4db_m4db_m4fl(obi->model_view, ld->conf.view, use_mat);
if (!ELEM(ob->type, OB_MESH, OB_MBALL, OB_CURVES_LEGACY, OB_SURF, OB_FONT)) {
return;
@ -2524,7 +2524,7 @@ void lineart_main_load_geometries(Depsgraph *depsgraph,
}
invert_m4_m4(inv, ld->conf.cam_obmat);
mul_m4db_m4db_m4fl_uniq(result, proj, inv);
mul_m4db_m4db_m4fl(result, proj, inv);
copy_m4_m4_db(proj, result);
copy_m4_m4_db(ld->conf.view_projection, proj);

View File

@ -1226,7 +1226,7 @@ bool lineart_main_try_generate_shadow(Depsgraph *depsgraph,
proj, -ld->w, ld->w, -ld->h, ld->h, ld->conf.near_clip, ld->conf.far_clip);
}
invert_m4_m4(inv, ld->conf.cam_obmat);
mul_m4db_m4db_m4fl_uniq(result, proj, inv);
mul_m4db_m4db_m4fl(result, proj, inv);
copy_m4_m4_db(proj, result);
copy_m4_m4_db(ld->conf.view_projection, proj);
unit_m4_db(view);