Merge pull request #1693 from Azaezel/alpha41/unMangleMatricies
Some checks are pending
Linux Build / Ubuntu Latest GCC (push) Waiting to run
MacOSX Build / MacOSX Latest Clang (push) Waiting to run
Windows Build / Windows Latest MSVC (push) Waiting to run

revert some of the more experimental matrix math
This commit is contained in:
Brian Roberts 2026-03-17 14:05:12 -05:00 committed by GitHub
commit 4757f542e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -15,8 +15,13 @@ namespace math_backend::mat44
inline float mat44_get_determinant(const float* m) inline float mat44_get_determinant(const float* m)
{ {
f32x4x4 ma = m_load(m); f32x4 r0 = v_load3_vec(m + 0); // row0 xyz
return v_extract0(m_determinant_affine(ma)); f32x4 r1 = v_load3_vec(m + 4); // row1 xyz
f32x4 r2 = v_load3_vec(m + 8); // row2 xyz
f32x4 c0 = v_cross(r1, r2); // cofactor for row0
f32x4 det = v_dot3(r0, c0); // splatted determinant
return v_extract0(det);
} }
// Matrix Scale: Float3 (assume w = 1.0f) // Matrix Scale: Float3 (assume w = 1.0f)
@ -32,51 +37,81 @@ namespace math_backend::mat44
m_store(m, ma); m_store(m, ma);
} }
inline void mat44_transform_plane_impl(const float* m, const float* scale, const float* plane, float* plane_result) inline void mat44_transform_plane_impl(const float* m, const float* scale, const float* plane, float* presult)
{ {
f32x4x4 M = m_load(m);
f32x4 plane_v = v_load(plane); f32x4 plane_v = v_load(plane);
f32x4 scale_v = v_load3_vec(scale); f32x4 scale_v = v_load3_vec(scale);
f32x4 invScale = v_rcp_nr(scale_v); f32x4 invScale = v_rcp_nr(scale_v);
// normal = plane.xyz f32x4x4 M = m_load(m);
f32x4 normal = plane_v;
// apply Inv(s) f32x4 normal = plane_v;
normal = v_mul(normal, invScale); normal = v_mul(normal, invScale);
// multiply by Inv(Tr(m)) (only the rotation part matters) //---------------------------------------------------------
f32x4 nx = v_mul(v_swizzle_singular_mask(normal, 0), M.r0); // Extract translation column (tx ty tz)
f32x4 ny = v_mul(v_swizzle_singular_mask(normal, 1), M.r1); //---------------------------------------------------------
f32x4 nz = v_mul(v_swizzle_singular_mask(normal, 2), M.r2); f32x4 shear = v_set(
v_extract0(v_swizzle_singular_mask(M.r0, 3)),
v_extract0(v_swizzle_singular_mask(M.r1, 3)),
v_extract0(v_swizzle_singular_mask(M.r2, 3)),
0.0f
);
float A = -v_extract0(v_dot3(M.r0, shear));
float B = -v_extract0(v_dot3(M.r1, shear));
float C = -v_extract0(v_dot3(M.r2, shear));
//---------------------------------------------------------
// Build columns of rotation
//---------------------------------------------------------
f32x4 col0 = v_set(
v_extract0(v_swizzle_singular_mask(M.r0, 0)),
v_extract0(v_swizzle_singular_mask(M.r1, 0)),
v_extract0(v_swizzle_singular_mask(M.r2, 0)),
0.0f
);
f32x4 col1 = v_set(
v_extract0(v_swizzle_singular_mask(M.r0, 1)),
v_extract0(v_swizzle_singular_mask(M.r1, 1)),
v_extract0(v_swizzle_singular_mask(M.r2, 1)),
0.0f
);
f32x4 col2 = v_set(
v_extract0(v_swizzle_singular_mask(M.r0, 2)),
v_extract0(v_swizzle_singular_mask(M.r1, 2)),
v_extract0(v_swizzle_singular_mask(M.r2, 2)),
0.0f
);
f32x4 nx = v_mul(v_swizzle_singular_mask(normal, 0), col0);
f32x4 ny = v_mul(v_swizzle_singular_mask(normal, 1), col1);
f32x4 nz = v_mul(v_swizzle_singular_mask(normal, 2), col2);
normal = v_add(v_add(nx, ny), nz); normal = v_add(v_add(nx, ny), nz);
normal = v_add(normal, v_set(A, B, C, 0.0f));
normal = v_normalize3(normal); normal = v_normalize3(normal);
// compute point on plane
float d = v_extract0(v_swizzle_singular_mask(plane_v, 3)); float d = v_extract0(v_swizzle_singular_mask(plane_v, 3));
f32x4 point = v_mul(plane_v, v_set1(-d)); f32x4 origNormal = plane_v;
point = v_preserve_w(point, v_set1(1.0f)); f32x4 point = v_mul(origNormal, v_set1(-d));
point = v_insert_w(point, v_set1(1.0f));
// apply scale
point = v_mul(point, scale_v); point = v_mul(point, scale_v);
// transform point by matrix
point = m_mul_vec4(M, point); point = m_mul_vec4(M, point);
// compute new plane distance
float newD = -v_extract0(v_dot3(point, normal)); float newD = -v_extract0(v_dot3(point, normal));
alignas(16) float n[4]; alignas(16) float n[4];
v_store(n, normal); v_store(n, normal);
plane_result[0] = n[0]; presult[0] = n[0];
plane_result[1] = n[1]; presult[1] = n[1];
plane_result[2] = n[2]; presult[2] = n[2];
plane_result[3] = newD; presult[3] = newD;
} }
inline void mat44_get_scale_impl(const float* m, float* s) inline void mat44_get_scale_impl(const float* m, float* s)
@ -109,92 +144,92 @@ namespace math_backend::mat44
m_store(m, ma); m_store(m, ma);
} }
// Vector Multiply: m * v (assume w = 0.0f)
inline void mat44_mul_vec3_impl(const float* m, const float* v, float* r)
{
f32x4x4 ma = m_load(m);
f32x4 va = v_load3_vec(v);
f32x4 vr = m_mul_vec3(ma, va);
v_store3(r, vr);
}
// Matrix Inverse // Matrix Inverse
inline void mat44_inverse_impl(float* m) inline void mat44_inverse_impl(float* m)
{ {
f32x4x4 ma = m_load(m); // using Cramers Rule find the Inverse
// Minv = (1/det(M)) * adjoint(M)
float det = mat44_get_determinant(m);
float invDet = 1.0f / det;
float temp[16];
temp[0] = (m[5] * m[10] - m[6] * m[9]) * invDet;
temp[1] = (m[9] * m[2] - m[10] * m[1]) * invDet;
temp[2] = (m[1] * m[6] - m[2] * m[5]) * invDet;
// Compute cofactors using cross products temp[4] = (m[6] * m[8] - m[4] * m[10]) * invDet;
f32x4x4 mTemp; temp[5] = (m[10] * m[0] - m[8] * m[2]) * invDet;
mTemp.r0 = v_cross(ma.r1, ma.r2); temp[6] = (m[2] * m[4] - m[0] * m[6]) * invDet;
mTemp.r1 = v_cross(ma.r2, ma.r0);
mTemp.r2 = v_cross(ma.r0, ma.r1);
// Determinant = dot(ma.r0, c0) temp[8] = (m[4] * m[9] - m[5] * m[8]) * invDet;
f32x4 det = v_dot3(ma.r0, mTemp.r0); temp[9] = (m[8] * m[1] - m[9] * m[0]) * invDet;
f32x4 invDet = v_rcp_nr(det); temp[10] = (m[0] * m[5] - m[1] * m[4]) * invDet;
// Scale cofactors m[0] = temp[0];
mTemp.r0 = v_mul(mTemp.r0, invDet); m[1] = temp[1];
mTemp.r1 = v_mul(mTemp.r1, invDet); m[2] = temp[2];
mTemp.r2 = v_mul(mTemp.r2, invDet);
// Store inverse 3x3 (transpose of cofactor matrix) m[4] = temp[4];
m[5] = temp[5];
m[6] = temp[6];
mTemp = m_transpose(mTemp); m[8] = temp[8];
mTemp.r3 = ma.r3; m[9] = temp[9];
m[10] = temp[10];
// ---- Translation ---- // invert the translation
temp[0] = -m[3];
// Load original translation temp[1] = -m[7];
f32x4 T = v_set(m[3], m[7], m[11], 0.0f); temp[2] = -m[11];
mat44_mul_vec3_impl(m, temp, &temp[4]);
// Compute -(Tx*ma.r0 + Ty*ma.r1 + Tz*ma.r2) m[3] = temp[4];
f32x4 result = v_mul(ma.r0, v_swizzle_singular_mask(T, 0)); m[7] = temp[5];
result = v_add(result, v_mul(ma.r1, v_swizzle_singular_mask(T, 1))); m[11] = temp[6];
result = v_add(result, v_mul(ma.r2, v_swizzle_singular_mask(T, 2)));
result = v_mul(result, v_set1(-1.0f));
m_store(m, mTemp);
// Store translation
m[3] = v_extract0(result);
m[7] = v_extract0(v_swizzle_singular_mask(result, 1));
m[11] = v_extract0(v_swizzle_singular_mask(result, 2));
} }
// Matrix Inverse // Matrix Inverse
inline void mat44_inverse_to_impl(const float* m, float* dst) inline void mat44_inverse_to_impl(const float* m, float* d)
{ {
f32x4x4 ma = m_load(m); // using Cramers Rule find the Inverse
// Minv = (1/det(M)) * adjoint(M)
float det = mat44_get_determinant(m);
// Compute cofactors using cross products float invDet = 1.0f / det;
f32x4x4 mTemp;
mTemp.r0 = v_cross(ma.r1, ma.r2);
mTemp.r1 = v_cross(ma.r2, ma.r0);
mTemp.r2 = v_cross(ma.r0, ma.r1);
// Determinant = dot(ma.r0, c0) d[0] = (m[5] * m[10] - m[6] * m[9]) * invDet;
f32x4 det = v_dot3(ma.r0, mTemp.r0); d[1] = (m[9] * m[2] - m[10] * m[1]) * invDet;
f32x4 invDet = v_rcp_nr(det); d[2] = (m[1] * m[6] - m[2] * m[5]) * invDet;
// Scale cofactors d[4] = (m[6] * m[8] - m[4] * m[10]) * invDet;
mTemp.r0 = v_mul(mTemp.r0, invDet); d[5] = (m[10] * m[0] - m[8] * m[2]) * invDet;
mTemp.r1 = v_mul(mTemp.r1, invDet); d[6] = (m[2] * m[4] - m[0] * m[6]) * invDet;
mTemp.r2 = v_mul(mTemp.r2, invDet);
// Store inverse 3x3 (transpose of cofactor matrix) d[8] = (m[4] * m[9] - m[5] * m[8]) * invDet;
d[9] = (m[8] * m[1] - m[9] * m[0]) * invDet;
d[10] = (m[0] * m[5] - m[1] * m[4]) * invDet;
mTemp = m_transpose(mTemp); // invert the translation
mTemp.r3 = ma.r3; float temp[6];
temp[0] = -m[3];
// ---- Translation ---- temp[1] = -m[7];
temp[2] = -m[11];
// Load original translation mat44_mul_vec3_impl(d, temp, &temp[3]);
f32x4 T = v_set(m[3], m[7], m[11], 0.0f); d[3] = temp[3];
d[7] = temp[4];
// Compute -(Tx*ma.r0 + Ty*ma.r1 + Tz*ma.r2) d[11] = temp[5];
f32x4 result = v_mul(ma.r0, v_swizzle_singular_mask(T, 0)); d[12] = m[12];
result = v_add(result, v_mul(ma.r1, v_swizzle_singular_mask(T, 1))); d[13] = m[13];
result = v_add(result, v_mul(ma.r2, v_swizzle_singular_mask(T, 2))); d[14] = m[14];
result = v_mul(result, v_set1(-1.0f)); d[15] = m[15];
m_store(dst, mTemp);
// Store translation
dst[3] = v_extract0(result);
dst[7] = v_extract0(v_swizzle_singular_mask(result, 1));
dst[11] = v_extract0(v_swizzle_singular_mask(result, 2));
} }
// Matrix Affine Inverse // Matrix Affine Inverse
@ -275,15 +310,6 @@ namespace math_backend::mat44
v_store3(r, vr); v_store3(r, vr);
} }
// Vector Multiply: m * v (assume w = 0.0f)
inline void mat44_mul_vec3_impl(const float* m, const float* v, float* r)
{
f32x4x4 ma = m_load(m);
f32x4 va = v_load3_vec(v);
f32x4 vr = m_mul_vec3(ma, va);
v_store3(r, vr);
}
// Vector Multiply: m * p (full [4x4] * [1x4]) // Vector Multiply: m * p (full [4x4] * [1x4])
inline void mat44_mul_float4_impl(const float* m, const float* p, float* r) inline void mat44_mul_float4_impl(const float* m, const float* p, float* r)
{ {