mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-03-23 22:29:28 +00:00
matrix work
This commit is contained in:
parent
58632d0d73
commit
11d8df4866
1 changed files with 170 additions and 113 deletions
|
|
@ -15,8 +15,13 @@ namespace math_backend::mat44
|
|||
|
||||
inline float mat44_get_determinant(const float* m)
|
||||
{
|
||||
f32x4x4 ma = m_load(m);
|
||||
return v_extract0(m_determinant_affine(ma));
|
||||
f32x4 r0 = v_load3_vec(m + 0); // row0 xyz
|
||||
f32x4 r1 = v_load3_vec(m + 4); // row1 xyz
|
||||
f32x4 r2 = v_load3_vec(m + 8); // row2 xyz
|
||||
|
||||
f32x4 c0 = v_cross(r1, r2); // cofactor for row0
|
||||
f32x4 det = v_dot3(r0, c0); // splatted determinant
|
||||
return v_extract0(det);
|
||||
}
|
||||
|
||||
// Matrix Scale: Float3 (assume w = 1.0f)
|
||||
|
|
@ -32,51 +37,66 @@ namespace math_backend::mat44
|
|||
m_store(m, ma);
|
||||
}
|
||||
|
||||
inline void mat44_transform_plane_impl(const float* m, const float* scale, const float* plane, float* plane_result)
|
||||
inline void mat44_transform_plane_impl(const float* m, const float* s, const float* p, float* presult)
|
||||
{
|
||||
f32x4 scale = v_load3_pos(s);
|
||||
f32x4 invScale = v_div(v_set1(1.0f), scale);
|
||||
|
||||
//--------------------------------------------------
|
||||
// Load affine 3x3 rows and translation
|
||||
//--------------------------------------------------
|
||||
f32x4 row0 = v_load3_vec(m + 0);
|
||||
f32x4 row1 = v_load3_vec(m + 4);
|
||||
f32x4 row2 = v_load3_vec(m + 8);
|
||||
f32x4 shear = v_set(m[3], m[7], m[11], 1.0);
|
||||
|
||||
//--------------------------------------------------
|
||||
// Compute A, B, C = -dot(row, shear)
|
||||
//--------------------------------------------------
|
||||
f32x4 A = v_mul(v_dot3(row0, shear), v_set1(-1.0f));
|
||||
f32x4 B = v_mul(v_dot3(row1, shear), v_set1(-1.0f));
|
||||
f32x4 C = v_mul(v_dot3(row2, shear), v_set1(-1.0f));
|
||||
|
||||
|
||||
f32x4x4 invTrMatrix;
|
||||
invTrMatrix.r0 = v_set(m[0], m[1], m[2], v_extract0(A));
|
||||
invTrMatrix.r1 = v_set(m[4], m[5], m[6], v_extract0(B));
|
||||
invTrMatrix.r2 = v_set(m[8], m[9], m[10], v_extract0(C));
|
||||
invTrMatrix.r3 = v_set(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
|
||||
// Apply inverse scale to upper-left 3x3
|
||||
invTrMatrix.r0 = v_mul(invTrMatrix.r0, invScale);
|
||||
invTrMatrix.r1 = v_mul(invTrMatrix.r1, invScale);
|
||||
invTrMatrix.r2 = v_mul(invTrMatrix.r2, invScale);
|
||||
|
||||
|
||||
f32x4 normal = v_load3_pos(p); // plane normal {x,y,z,1}
|
||||
f32x4 point = v_mul(normal, v_set1(-p[3])); // point = -d * normal
|
||||
|
||||
// Apply transform to normal
|
||||
f32x4 normTransformed = m_mul_vec3(invTrMatrix, normal);
|
||||
normTransformed = v_normalize3(normTransformed);
|
||||
|
||||
// transform point with original
|
||||
f32x4 scaleVec = v_load3_pos(s); // scale vector
|
||||
f32x4 pointScaled = v_mul(point, scaleVec);
|
||||
|
||||
pointScaled = v_insert_w(pointScaled, v_set1(1.0f));
|
||||
|
||||
f32x4x4 M = m_load(m);
|
||||
// Transform point
|
||||
f32x4 pointTransformed = m_mul_vec4(M, pointScaled);
|
||||
|
||||
f32x4 plane_v = v_load(plane);
|
||||
f32x4 scale_v = v_load3_vec(scale);
|
||||
f32x4 invScale = v_rcp_nr(scale_v);
|
||||
//--------------------------------------------------
|
||||
// Compute plane d = -dot(normal, transformedPoint)
|
||||
//--------------------------------------------------
|
||||
f32x4 dp = v_dot3( pointTransformed, normTransformed);
|
||||
float planeD = -v_extract0(dp);
|
||||
|
||||
// normal = plane.xyz
|
||||
f32x4 normal = plane_v;
|
||||
|
||||
// apply Inv(s)
|
||||
normal = v_mul(normal, invScale);
|
||||
|
||||
// multiply by Inv(Tr(m)) (only the rotation part matters)
|
||||
f32x4 nx = v_mul(v_swizzle_singular_mask(normal, 0), M.r0);
|
||||
f32x4 ny = v_mul(v_swizzle_singular_mask(normal, 1), M.r1);
|
||||
f32x4 nz = v_mul(v_swizzle_singular_mask(normal, 2), M.r2);
|
||||
|
||||
normal = v_add(v_add(nx, ny), nz);
|
||||
|
||||
normal = v_normalize3(normal);
|
||||
|
||||
// compute point on plane
|
||||
float d = v_extract0(v_swizzle_singular_mask(plane_v, 3));
|
||||
|
||||
f32x4 point = v_mul(plane_v, v_set1(-d));
|
||||
point = v_preserve_w(point, v_set1(1.0f));
|
||||
|
||||
// apply scale
|
||||
point = v_mul(point, scale_v);
|
||||
|
||||
// transform point by matrix
|
||||
point = m_mul_vec4(M, point);
|
||||
|
||||
// compute new plane distance
|
||||
float newD = -v_extract0(v_dot3(point, normal));
|
||||
|
||||
alignas(16) float n[4];
|
||||
v_store(n, normal);
|
||||
|
||||
plane_result[0] = n[0];
|
||||
plane_result[1] = n[1];
|
||||
plane_result[2] = n[2];
|
||||
plane_result[3] = newD;
|
||||
presult[0] = v_extract0(normTransformed);
|
||||
presult[1] = v_extract0(v_swizzle_mask(normTransformed, 1, 1, 1, 1));
|
||||
presult[2] = v_extract0(v_swizzle_mask(normTransformed, 2, 2, 2, 2));
|
||||
presult[3] = planeD;
|
||||
}
|
||||
|
||||
inline void mat44_get_scale_impl(const float* m, float* s)
|
||||
|
|
@ -109,92 +129,138 @@ namespace math_backend::mat44
|
|||
m_store(m, ma);
|
||||
}
|
||||
|
||||
// Vector Multiply: m * v (assume w = 0.0f)
|
||||
inline void mat44_mul_vec3_impl(const float* m, const float* v, float* r)
|
||||
{
|
||||
f32x4x4 ma = m_load(m);
|
||||
f32x4 va = v_load3_vec(v);
|
||||
f32x4 vr = m_mul_vec3(ma, va);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
|
||||
// Matrix Inverse
|
||||
inline void mat44_inverse_impl(float* m)
|
||||
{
|
||||
f32x4x4 ma = m_load(m);
|
||||
//// using Cramers Rule find the Inverse
|
||||
//// Minv = (1/det(M)) * adjoint(M)
|
||||
f32x4 r0 = v_load3_vec(m + 0); // row 0: m00 m01 m02
|
||||
f32x4 r1 = v_load3_vec(m + 4); // row 1: m10 m11 m12
|
||||
f32x4 r2 = v_load3_vec(m + 8); // row 2: m20 m21 m22
|
||||
float det = mat44_get_determinant(m);
|
||||
f32x4 invDet = v_set1(1.0f / det);
|
||||
|
||||
// Compute cofactors using cross products
|
||||
f32x4x4 mTemp;
|
||||
mTemp.r0 = v_cross(ma.r1, ma.r2);
|
||||
mTemp.r1 = v_cross(ma.r2, ma.r0);
|
||||
mTemp.r2 = v_cross(ma.r0, ma.r1);
|
||||
f32x4x4 temp;
|
||||
|
||||
// Determinant = dot(ma.r0, c0)
|
||||
f32x4 det = v_dot3(ma.r0, mTemp.r0);
|
||||
f32x4 invDet = v_rcp_nr(det);
|
||||
temp.r0 = v_set(
|
||||
(m[5] * m[10] - m[6] * m[9]),
|
||||
(m[9] * m[2] - m[10] * m[1]),
|
||||
(m[1] * m[6] - m[2] * m[5]),
|
||||
0
|
||||
);
|
||||
|
||||
// Scale cofactors
|
||||
mTemp.r0 = v_mul(mTemp.r0, invDet);
|
||||
mTemp.r1 = v_mul(mTemp.r1, invDet);
|
||||
mTemp.r2 = v_mul(mTemp.r2, invDet);
|
||||
temp.r1 = v_set(
|
||||
(m[6] * m[8] - m[4] * m[10]),
|
||||
(m[10] * m[0] - m[8] * m[2]),
|
||||
(m[2] * m[4] - m[0] * m[6]),
|
||||
0
|
||||
);
|
||||
|
||||
// Store inverse 3x3 (transpose of cofactor matrix)
|
||||
temp.r2 = v_set(
|
||||
(m[4] * m[9] - m[5] * m[8]),
|
||||
(m[8] * m[1] - m[9] * m[0]),
|
||||
(m[0] * m[5] - m[1] * m[4]),
|
||||
0
|
||||
);
|
||||
|
||||
mTemp = m_transpose(mTemp);
|
||||
mTemp.r3 = ma.r3;
|
||||
temp.r0 = v_mul(temp.r0, invDet);
|
||||
temp.r1 = v_mul(temp.r1, invDet);
|
||||
temp.r2 = v_mul(temp.r2, invDet);
|
||||
|
||||
// ---- Translation ----
|
||||
// Compute new translation: -R^-1 * T
|
||||
f32x4 t = v_set(m[3], m[7], m[11], 0.0f); // row-major: last element in row
|
||||
f32x4 t_new;
|
||||
|
||||
// Load original translation
|
||||
f32x4 T = v_set(m[3], m[7], m[11], 0.0f);
|
||||
|
||||
// Compute -(Tx*ma.r0 + Ty*ma.r1 + Tz*ma.r2)
|
||||
f32x4 result = v_mul(ma.r0, v_swizzle_singular_mask(T, 0));
|
||||
result = v_add(result, v_mul(ma.r1, v_swizzle_singular_mask(T, 1)));
|
||||
result = v_add(result, v_mul(ma.r2, v_swizzle_singular_mask(T, 2)));
|
||||
result = v_mul(result, v_set1(-1.0f));
|
||||
t_new = v_set(
|
||||
-v_extract0(v_dot3(temp.r0, t)),
|
||||
-v_extract0(v_dot3(temp.r1, t)),
|
||||
-v_extract0(v_dot3(temp.r2, t)),
|
||||
0.0f
|
||||
);
|
||||
|
||||
m_store(m, mTemp);
|
||||
// Store back rotation
|
||||
m[0] = v_extract0(temp.r0); m[1] = v_extract0(v_swizzle_mask(temp.r0, 1, 1, 1, 1)); m[2] = v_extract0(v_swizzle_mask(temp.r0, 2, 2, 2, 2));
|
||||
m[4] = v_extract0(temp.r1); m[5] = v_extract0(v_swizzle_mask(temp.r1, 1, 1, 1, 1)); m[6] = v_extract0(v_swizzle_mask(temp.r1, 2, 2, 2, 2));
|
||||
m[8] = v_extract0(temp.r2); m[9] = v_extract0(v_swizzle_mask(temp.r2, 1, 1, 1, 1)); m[10] = v_extract0(v_swizzle_mask(temp.r2, 2, 2, 2, 2));
|
||||
|
||||
// Store translation
|
||||
m[3] = v_extract0(result);
|
||||
m[7] = v_extract0(v_swizzle_singular_mask(result, 1));
|
||||
m[11] = v_extract0(v_swizzle_singular_mask(result, 2));
|
||||
// Store translation
|
||||
m[3] = v_extract0(t_new);
|
||||
m[7] = v_extract0(v_swizzle_mask(t_new, 1, 1, 1, 1));
|
||||
m[11] = v_extract0(v_swizzle_mask(t_new, 2, 2, 2, 2));
|
||||
}
|
||||
|
||||
// Matrix Inverse
|
||||
inline void mat44_inverse_to_impl(const float* m, float* dst)
|
||||
inline void mat44_inverse_to_impl(const float* m, float* d)
|
||||
{
|
||||
f32x4x4 ma = m_load(m);
|
||||
//// using Cramers Rule find the Inverse
|
||||
//// Minv = (1/det(M)) * adjoint(M)
|
||||
f32x4 r0 = v_load3_vec(m + 0); // row 0: m00 m01 m02
|
||||
f32x4 r1 = v_load3_vec(m + 4); // row 1: m10 m11 m12
|
||||
f32x4 r2 = v_load3_vec(m + 8); // row 2: m20 m21 m22
|
||||
float det = mat44_get_determinant(m);
|
||||
f32x4 invDet = v_set1(1.0f / det);
|
||||
|
||||
// Compute cofactors using cross products
|
||||
f32x4x4 mTemp;
|
||||
mTemp.r0 = v_cross(ma.r1, ma.r2);
|
||||
mTemp.r1 = v_cross(ma.r2, ma.r0);
|
||||
mTemp.r2 = v_cross(ma.r0, ma.r1);
|
||||
f32x4x4 temp;
|
||||
|
||||
// Determinant = dot(ma.r0, c0)
|
||||
f32x4 det = v_dot3(ma.r0, mTemp.r0);
|
||||
f32x4 invDet = v_rcp_nr(det);
|
||||
temp.r0 = v_set(
|
||||
(m[5] * m[10] - m[6] * m[9]),
|
||||
(m[9] * m[2] - m[10] * m[1]),
|
||||
(m[1] * m[6] - m[2] * m[5]),
|
||||
0
|
||||
);
|
||||
|
||||
// Scale cofactors
|
||||
mTemp.r0 = v_mul(mTemp.r0, invDet);
|
||||
mTemp.r1 = v_mul(mTemp.r1, invDet);
|
||||
mTemp.r2 = v_mul(mTemp.r2, invDet);
|
||||
temp.r1 = v_set(
|
||||
(m[6] * m[8] - m[4] * m[10]),
|
||||
(m[10] * m[0] - m[8] * m[2]),
|
||||
(m[2] * m[4] - m[0] * m[6]),
|
||||
0
|
||||
);
|
||||
|
||||
// Store inverse 3x3 (transpose of cofactor matrix)
|
||||
temp.r2 = v_set(
|
||||
(m[4] * m[9] - m[5] * m[8]),
|
||||
(m[8] * m[1] - m[9] * m[0]),
|
||||
(m[0] * m[5] - m[1] * m[4]),
|
||||
0
|
||||
);
|
||||
|
||||
mTemp = m_transpose(mTemp);
|
||||
mTemp.r3 = ma.r3;
|
||||
temp.r0 = v_mul(temp.r0, invDet);
|
||||
temp.r1 = v_mul(temp.r1, invDet);
|
||||
temp.r2 = v_mul(temp.r2, invDet);
|
||||
|
||||
// ---- Translation ----
|
||||
// Compute new translation: -R^-1 * T
|
||||
f32x4 t = v_set(m[3], m[7], m[11], 0.0f); // row-major: last element in row
|
||||
f32x4 t_new;
|
||||
|
||||
// Load original translation
|
||||
f32x4 T = v_set(m[3], m[7], m[11], 0.0f);
|
||||
t_new = v_set(
|
||||
-v_extract0(v_dot3(temp.r0, t)),
|
||||
-v_extract0(v_dot3(temp.r1, t)),
|
||||
-v_extract0(v_dot3(temp.r2, t)),
|
||||
0.0f
|
||||
);
|
||||
|
||||
// Compute -(Tx*ma.r0 + Ty*ma.r1 + Tz*ma.r2)
|
||||
f32x4 result = v_mul(ma.r0, v_swizzle_singular_mask(T, 0));
|
||||
result = v_add(result, v_mul(ma.r1, v_swizzle_singular_mask(T, 1)));
|
||||
result = v_add(result, v_mul(ma.r2, v_swizzle_singular_mask(T, 2)));
|
||||
result = v_mul(result, v_set1(-1.0f));
|
||||
// Store back rotation
|
||||
d[0] = v_extract0(temp.r0); d[1] = v_extract0(v_swizzle_mask(temp.r0, 1, 1, 1, 1)); d[2] = v_extract0(v_swizzle_mask(temp.r0, 2, 2, 2, 2));
|
||||
d[4] = v_extract0(temp.r1); d[5] = v_extract0(v_swizzle_mask(temp.r1, 1, 1, 1, 1)); d[6] = v_extract0(v_swizzle_mask(temp.r1, 2, 2, 2, 2));
|
||||
d[8] = v_extract0(temp.r2); d[9] = v_extract0(v_swizzle_mask(temp.r2, 1, 1, 1, 1)); d[10] = v_extract0(v_swizzle_mask(temp.r2, 2, 2, 2, 2));
|
||||
|
||||
m_store(dst, mTemp);
|
||||
|
||||
// Store translation
|
||||
dst[3] = v_extract0(result);
|
||||
dst[7] = v_extract0(v_swizzle_singular_mask(result, 1));
|
||||
dst[11] = v_extract0(v_swizzle_singular_mask(result, 2));
|
||||
// Store translation
|
||||
d[3] = v_extract0(t_new);
|
||||
d[7] = v_extract0(v_swizzle_mask(t_new, 1, 1, 1, 1));
|
||||
d[11] = v_extract0(v_swizzle_mask(t_new, 2, 2, 2, 2));
|
||||
d[12] = m[12];
|
||||
d[13] = m[13];
|
||||
d[14] = m[14];
|
||||
d[15] = m[15];
|
||||
}
|
||||
|
||||
// Matrix Affine Inverse
|
||||
|
|
@ -275,15 +341,6 @@ namespace math_backend::mat44
|
|||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Vector Multiply: m * v (assume w = 0.0f)
|
||||
inline void mat44_mul_vec3_impl(const float* m, const float* v, float* r)
|
||||
{
|
||||
f32x4x4 ma = m_load(m);
|
||||
f32x4 va = v_load3_vec(v);
|
||||
f32x4 vr = m_mul_vec3(ma, va);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Vector Multiply: m * p (full [4x4] * [1x4])
|
||||
inline void mat44_mul_float4_impl(const float* m, const float* p, float* r)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue