mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-03-21 13:21:02 +00:00
more intrinsics
add transform plane added first batch function for mulp to intrinsics
This commit is contained in:
parent
ac6ec05690
commit
add7f2a5d7
14 changed files with 710 additions and 113 deletions
|
|
@ -32,6 +32,53 @@ namespace math_backend::mat44
|
|||
m_store(m, ma);
|
||||
}
|
||||
|
||||
inline void mat44_transform_plane_impl(const float* m, const float* scale, const float* plane, float* plane_result)
|
||||
{
|
||||
f32x4x4 M = m_load(m);
|
||||
|
||||
f32x4 plane_v = v_load(plane);
|
||||
f32x4 scale_v = v_load3_vec(scale);
|
||||
f32x4 invScale = v_rcp_nr(scale_v);
|
||||
|
||||
// normal = plane.xyz
|
||||
f32x4 normal = plane_v;
|
||||
|
||||
// apply Inv(s)
|
||||
normal = v_mul(normal, invScale);
|
||||
|
||||
// multiply by Inv(Tr(m)) (only the rotation part matters)
|
||||
f32x4 nx = v_mul(v_swizzle_singular_mask(normal, 0), M.r0);
|
||||
f32x4 ny = v_mul(v_swizzle_singular_mask(normal, 1), M.r1);
|
||||
f32x4 nz = v_mul(v_swizzle_singular_mask(normal, 2), M.r2);
|
||||
|
||||
normal = v_add(v_add(nx, ny), nz);
|
||||
|
||||
normal = v_normalize3(normal);
|
||||
|
||||
// compute point on plane
|
||||
float d = v_extract0(v_swizzle_singular_mask(plane_v, 3));
|
||||
|
||||
f32x4 point = v_mul(plane_v, v_set1(-d));
|
||||
point = v_preserve_w(point, v_set1(1.0f));
|
||||
|
||||
// apply scale
|
||||
point = v_mul(point, scale_v);
|
||||
|
||||
// transform point by matrix
|
||||
point = m_mul_vec4(M, point);
|
||||
|
||||
// compute new plane distance
|
||||
float newD = -v_extract0(v_dot3(point, normal));
|
||||
|
||||
alignas(16) float n[4];
|
||||
v_store(n, normal);
|
||||
|
||||
plane_result[0] = n[0];
|
||||
plane_result[1] = n[1];
|
||||
plane_result[2] = n[2];
|
||||
plane_result[3] = newD;
|
||||
}
|
||||
|
||||
inline void mat44_get_scale_impl(const float* m, float* s)
|
||||
{
|
||||
f32x4x4 ma = m_load(m);
|
||||
|
|
@ -341,4 +388,39 @@ namespace math_backend::mat44
|
|||
m_store(m, mo);
|
||||
}
|
||||
|
||||
//--------------------------------------------------
|
||||
// MATRIX BATCH FUNCTIONS
|
||||
//--------------------------------------------------
|
||||
|
||||
inline void mat44_batch_mul_pos3(const float* m, const float* points, size_t count, float* result)
|
||||
{
|
||||
size_t i = 0;
|
||||
f32x4x4 ma = m_load(m);
|
||||
|
||||
// AVX has 8 lanes to play with
|
||||
#if defined(MATH_SIMD_AVX2) || defined(MATH_SIMD_AVX)
|
||||
// 8-wide AVX only
|
||||
for (; i + 8 <= count; i += 8)
|
||||
{
|
||||
vec4_batch8 va = load_vec3_batch8(&points[i*3], 1.0f, false);
|
||||
vec4_batch8 vr = m_mul_pos3_batch8(ma, va);
|
||||
store_vec3_batch8(&result[i*3], vr);
|
||||
}
|
||||
#endif // MATH_SIMD_AVX2 || MATH_SIMD_AVX
|
||||
|
||||
// 4-wide
|
||||
for (; i + 4 <= count; i += 4)
|
||||
{
|
||||
vec4_batch4 va = load_vec3_batch4(&points[i * 3], 1.0f, false);
|
||||
vec4_batch4 vr = m_mul_pos3_batch4(ma, va);
|
||||
store_vec3_batch4(&result[i * 3], vr);
|
||||
}
|
||||
|
||||
for (; i < count; ++i)
|
||||
{
|
||||
size_t idx = i * 3;
|
||||
mat44_mul_pos3_impl(m, &points[idx], &result[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace math_backend::mat44
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "math/public/float3_dispatch.h"
|
||||
#include "math/public/mat44_dispatch.h"
|
||||
#include "math/mConstants.h"
|
||||
#include "math/mMatrix.h"
|
||||
#include <cmath> // for sqrtf, etc.
|
||||
|
||||
namespace math_backend::float4::dispatch
|
||||
|
|
@ -375,6 +376,88 @@ namespace math_backend::mat44::dispatch
|
|||
mresult[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
|
||||
};
|
||||
|
||||
gMat44.transform_plane = [](const F32* m, const F32* s, const F32* p, F32* presult) {
|
||||
// We take in a matrix, a scale factor, and a plane equation. We want to output
|
||||
// the resultant normal
|
||||
// We have T = m*s
|
||||
// To multiply the normal, we want Inv(Tr(m*s))
|
||||
// Inv(Tr(ms)) = Inv(Tr(s) * Tr(m))
|
||||
// = Inv(Tr(m)) * Inv(Tr(s))
|
||||
//
|
||||
// Inv(Tr(s)) = Inv(s) = [ 1/x 0 0 0]
|
||||
// [ 0 1/y 0 0]
|
||||
// [ 0 0 1/z 0]
|
||||
// [ 0 0 0 1]
|
||||
//
|
||||
// Since m is an affine matrix,
|
||||
// Tr(m) = [ [ ] 0 ]
|
||||
// [ [ R ] 0 ]
|
||||
// [ [ ] 0 ]
|
||||
// [ [ x y z ] 1 ]
|
||||
//
|
||||
// Inv(Tr(m)) = [ [ -1 ] 0 ]
|
||||
// [ [ R ] 0 ]
|
||||
// [ [ ] 0 ]
|
||||
// [ [ A B C ] 1 ]
|
||||
// Where:
|
||||
//
|
||||
// P = (x, y, z)
|
||||
// A = -(Row(0, r) * P);
|
||||
// B = -(Row(1, r) * P);
|
||||
// C = -(Row(2, r) * P);
|
||||
|
||||
MatrixF invScale(true);
|
||||
F32* pScaleElems = invScale;
|
||||
pScaleElems[MatrixF::idx(0, 0)] = 1.0f / s[0];
|
||||
pScaleElems[MatrixF::idx(1, 1)] = 1.0f / s[1];
|
||||
pScaleElems[MatrixF::idx(2, 2)] = 1.0f / s[2];
|
||||
|
||||
const Point3F shear(m[MatrixF::idx(3, 0)], m[MatrixF::idx(3, 1)], m[MatrixF::idx(3, 2)]);
|
||||
|
||||
const Point3F row0(m[MatrixF::idx(0, 0)], m[MatrixF::idx(0, 1)], m[MatrixF::idx(0, 2)]);
|
||||
const Point3F row1(m[MatrixF::idx(1, 0)], m[MatrixF::idx(1, 1)], m[MatrixF::idx(1, 2)]);
|
||||
const Point3F row2(m[MatrixF::idx(2, 0)], m[MatrixF::idx(2, 1)], m[MatrixF::idx(2, 2)]);
|
||||
|
||||
const F32 A = -mDot(row0, shear);
|
||||
const F32 B = -mDot(row1, shear);
|
||||
const F32 C = -mDot(row2, shear);
|
||||
|
||||
MatrixF invTrMatrix(true);
|
||||
F32* destMat = invTrMatrix;
|
||||
destMat[MatrixF::idx(0, 0)] = m[MatrixF::idx(0, 0)];
|
||||
destMat[MatrixF::idx(1, 0)] = m[MatrixF::idx(1, 0)];
|
||||
destMat[MatrixF::idx(2, 0)] = m[MatrixF::idx(2, 0)];
|
||||
destMat[MatrixF::idx(0, 1)] = m[MatrixF::idx(0, 1)];
|
||||
destMat[MatrixF::idx(1, 1)] = m[MatrixF::idx(1, 1)];
|
||||
destMat[MatrixF::idx(2, 1)] = m[MatrixF::idx(2, 1)];
|
||||
destMat[MatrixF::idx(0, 2)] = m[MatrixF::idx(0, 2)];
|
||||
destMat[MatrixF::idx(1, 2)] = m[MatrixF::idx(1, 2)];
|
||||
destMat[MatrixF::idx(2, 2)] = m[MatrixF::idx(2, 2)];
|
||||
destMat[MatrixF::idx(0, 3)] = A;
|
||||
destMat[MatrixF::idx(1, 3)] = B;
|
||||
destMat[MatrixF::idx(2, 3)] = C;
|
||||
invTrMatrix.mul(invScale);
|
||||
|
||||
Point3F norm(p[0], p[1], p[2]);
|
||||
Point3F point = norm * -p[3];
|
||||
invTrMatrix.mulP(norm);
|
||||
norm.normalize();
|
||||
|
||||
MatrixF temp;
|
||||
dMemcpy(temp, m, sizeof(F32) * 16);
|
||||
point.x *= s[0];
|
||||
point.y *= s[1];
|
||||
point.z *= s[2];
|
||||
temp.mulP(point);
|
||||
|
||||
PlaneF resultPlane(point, norm);
|
||||
presult[0] = resultPlane.x;
|
||||
presult[1] = resultPlane.y;
|
||||
presult[2] = resultPlane.z;
|
||||
presult[3] = resultPlane.d;
|
||||
|
||||
};
|
||||
|
||||
gMat44.normalize = [](float* a) {
|
||||
F32 col0[3], col1[3], col2[3];
|
||||
// extract columns 0 and 1
|
||||
|
|
@ -404,5 +487,14 @@ namespace math_backend::mat44::dispatch
|
|||
a[10] = col2[2];
|
||||
|
||||
};
|
||||
|
||||
gMat44.batch_mul_pos3 = [](const float* m, const float* pts, size_t count, float* result_ptrs) {
|
||||
size_t i = 0;
|
||||
for (; i < count; i++)
|
||||
{
|
||||
size_t idx = i * 3;
|
||||
gMat44.mul_pos3(m, &pts[idx], &result_ptrs[idx]);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue