mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-03-19 12:20:57 +00:00
ISA backends float3 and float4 - cleanup history squash
working for both neon32 and neon64 Update math_backend.cpp further sse simd additions avx2 float3 added added normalize_magnitude added divide fast to float3 may copy to float4 move static spheremesh to drawSphere (initialize on first use) so platform has a chance to load the math backend all float3 and float4 functions and isas completed all options of float3 and float4 functions in isas and math_c neon still to be done but that will be on mac. Update math_backend.cpp mac isa neon update added float3 restructured the classes to look more like the final version of the x86 classes linux required changes Update build-macos-clang.yml Update build-macos-clang.yml Revert "Update build-macos-clang.yml" This reverts commit29dfc567f4. Revert "Update build-macos-clang.yml" This reverts commit2abad2b4ca. Update CMakeLists.txt fix macs stupid build remove god awful rolling average from frame time tracker.... use intrinsic headers instead each isa implementation now uses a header for that isa's intrinsic functions these are then used in the impl files. This will make it easier for matrix functions when those are implemented. fixed comment saying 256 when it should be 512 for avx512 consolidated initializers for function tables Update neon_intrinsics.h fixes for some neon intrinsics no idea if this is the best way to do these but they work at least v_cross is especially messy at the moment we basically just do it as a c math function need to look into getting this done correctly
This commit is contained in:
parent
73ed502ac9
commit
67f12311d4
36 changed files with 1481 additions and 419 deletions
123
Engine/source/math/impl/float3_impl.inl
Normal file
123
Engine/source/math/impl/float3_impl.inl
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
#pragma once
|
||||
#include <cmath> // for sqrtf, etc.
|
||||
#include "../mConstants.h"
|
||||
|
||||
// Safely loads a float3 -> simd 4 lane backend
|
||||
namespace math_backend::float3
|
||||
{
|
||||
//----------------------------------------------------------
|
||||
// Add two float4 vectors: r = a + b
|
||||
inline void float3_add_impl(const float* a, const float* b, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vb = v_load3_vec(b);
|
||||
f32x4 vr = v_add(va, vb);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Subtract: r = a - b
|
||||
inline void float3_sub_impl(const float* a, const float* b, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vb = v_load3_vec(b);
|
||||
f32x4 vr = v_sub(va, vb);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Multiply element-wise: r = a * b
|
||||
inline void float3_mul_impl(const float* a, const float* b, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vb = v_load3_vec(b);
|
||||
f32x4 vr = v_mul(va, vb);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Multiply by scalar: r = a * s
|
||||
inline void float3_mul_scalar_impl(const float* a, float s, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vs = v_set1(s);
|
||||
f32x4 vr = v_mul(va, vs);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Divide element-wise: r = a / b
|
||||
inline void float3_div_impl(const float* a, const float* b, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vb = v_load3_vec(b);
|
||||
f32x4 vr = v_div(va, vb);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Divide by scalar: r = a / s
|
||||
inline void float3_div_scalar_impl(const float* a, float s, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vs = v_set1(s);
|
||||
f32x4 vr = v_div(va, vs);
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
// Dot product: returns scalar
|
||||
inline float float3_dot_impl(const float* a, const float* b)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vb = v_load3_vec(b);
|
||||
f32x4 vdot = v_dot3(va, vb);
|
||||
return v_extract0(vdot); // first lane is the sum of 3 elements
|
||||
}
|
||||
|
||||
// Length squared
|
||||
inline float float3_length_squared_impl(const float* a)
|
||||
{
|
||||
return float3_dot_impl(a, a);
|
||||
}
|
||||
|
||||
// Length
|
||||
inline float float3_length_impl(const float* a)
|
||||
{
|
||||
return std::sqrt(float3_length_squared_impl(a));
|
||||
}
|
||||
|
||||
// Normalize in-place
|
||||
inline void float3_normalize_impl(float* a)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 invLen = v_rsqrt_nr(v_dot3(va, va)); // fully abstracted
|
||||
f32x4 vnorm = v_mul(va, invLen);
|
||||
v_store3(a, vnorm);
|
||||
}
|
||||
|
||||
// Normalize with magnitude: r = normalize(a) * r
|
||||
inline void float3_normalize_mag_impl(float* a, float r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
|
||||
// invLen = r / sqrt(dot(a,a)) = r * rsqrt(dot(a,a))
|
||||
f32x4 invLen = v_mul(v_set1(r), v_rsqrt_nr(v_dot3(va, va)));
|
||||
|
||||
f32x4 vnorm = v_mul(va, invLen);
|
||||
v_store(a, vnorm);
|
||||
}
|
||||
|
||||
// Linear interpolation: r = from + (to - from) * f
|
||||
inline void float3_lerp_impl(const float* from, const float* to, float f, float* r)
|
||||
{
|
||||
f32x4 vfrom = v_load3_vec(from);
|
||||
f32x4 vto = v_load3_vec(to);
|
||||
f32x4 vf = v_set1(f);
|
||||
f32x4 vr = v_add(vfrom, v_mul(vf, v_sub(vto, vfrom)));
|
||||
v_store3(r, vr);
|
||||
}
|
||||
|
||||
inline void float3_cross_impl(const float* a, const float* b, float* r)
|
||||
{
|
||||
f32x4 va = v_load3_vec(a);
|
||||
f32x4 vb = v_load3_vec(b);
|
||||
f32x4 vcross = v_cross(va, vb);
|
||||
v_store3(r, vcross);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#include "math/public/float4_dispatch.h"
|
||||
#include "math/mConstants.h"
|
||||
#include <math.h>
|
||||
|
||||
namespace math_backend::float4::dispatch
|
||||
{
|
||||
void install_scalar()
|
||||
{
|
||||
gFloat4.add = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] + b[i];
|
||||
};
|
||||
|
||||
gFloat4.sub = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] - b[i];
|
||||
};
|
||||
|
||||
gFloat4.mul = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] * b[i];
|
||||
};
|
||||
|
||||
gFloat4.mul_scalar = [](const float* a, float s, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] * s;
|
||||
};
|
||||
|
||||
gFloat4.div = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] / b[i];
|
||||
};
|
||||
|
||||
gFloat4.div_scalar = [](const float* a, float s, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] / s;
|
||||
};
|
||||
|
||||
gFloat4.dot = [](const float* a, const float* b) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 4; i++) sum += a[i] * b[i];
|
||||
return sum;
|
||||
};
|
||||
|
||||
gFloat4.length = [](const float* a) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 4; i++) sum += a[i] * a[i];
|
||||
return sqrtf(sum);
|
||||
};
|
||||
|
||||
gFloat4.lengthSquared = [](const float* a) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 4; i++) sum += a[i] * a[i];
|
||||
return (sum);
|
||||
};
|
||||
|
||||
gFloat4.normalize = [](float* a) {
|
||||
float len = gFloat4.length(a);
|
||||
if (len > POINT_EPSILON) for (int i = 0; i < 4; i++) a[i] /= len;
|
||||
};
|
||||
|
||||
gFloat4.lerp = [](const float* from, const float* to, float f, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = from[i] + (to[i] - from[i]) * f;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -65,8 +65,8 @@ namespace math_backend::float4
|
|||
{
|
||||
f32x4 va = v_load(a);
|
||||
f32x4 vb = v_load(b);
|
||||
f32x4 vmul = v_mul(va, vb);
|
||||
return v_hadd4(vmul);
|
||||
f32x4 vdot = v_dot4(va, vb); // calls ISA-specific implementation
|
||||
return v_extract0(vdot);
|
||||
}
|
||||
|
||||
// Length squared
|
||||
|
|
@ -84,21 +84,22 @@ namespace math_backend::float4
|
|||
// Normalize in-place
|
||||
inline void float4_normalize_impl(float* a)
|
||||
{
|
||||
float len = float4_length_impl(a);
|
||||
if (len > POINT_EPSILON) // safe threshold
|
||||
{
|
||||
float4_mul_scalar_impl(a, 1.0f / len, a);
|
||||
}
|
||||
f32x4 va = v_load(a);
|
||||
f32x4 invLen = v_rsqrt_nr(v_dot4(va, va)); // fully abstracted
|
||||
f32x4 vnorm = v_mul(va, invLen);
|
||||
v_store(a, vnorm);
|
||||
}
|
||||
|
||||
// Normalize with magnitude: r = normalize(a) * r
|
||||
inline void float4_normalize_mag_impl(float* a, float r)
|
||||
{
|
||||
float len = float4_length_impl(a);
|
||||
if (len > POINT_EPSILON)
|
||||
{
|
||||
float4_mul_scalar_impl(a, r / len, a);
|
||||
}
|
||||
f32x4 va = v_load(a);
|
||||
|
||||
// invLen = r / sqrt(dot(a,a)) = r * rsqrt(dot(a,a))
|
||||
f32x4 invLen = v_mul(v_set1(r), v_rsqrt_nr(v_dot4(va, va)));
|
||||
|
||||
f32x4 vnorm = v_mul(va, invLen);
|
||||
v_store(a, vnorm);
|
||||
}
|
||||
|
||||
// Linear interpolation: r = from + (to - from) * f
|
||||
|
|
@ -111,4 +112,12 @@ namespace math_backend::float4
|
|||
v_store(r, vr);
|
||||
}
|
||||
|
||||
inline void float4_cross_impl(const float* a, const float* b, float* r)
|
||||
{
|
||||
f32x4 va = v_load(a);
|
||||
f32x4 vb = v_load(b);
|
||||
f32x4 vcross = v_cross(va, vb);
|
||||
v_store(r, vcross);
|
||||
}
|
||||
|
||||
} // namespace math_backend::float4
|
||||
|
|
|
|||
208
Engine/source/math/impl/math_c.cpp
Normal file
208
Engine/source/math/impl/math_c.cpp
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
#include "math/public/float4_dispatch.h"
|
||||
#include "math/public/float3_dispatch.h"
|
||||
#include "math/public/mat44_dispatch.h"
|
||||
#include "math/mConstants.h"
|
||||
#include <cmath> // for sqrtf, etc.
|
||||
|
||||
namespace math_backend::float4::dispatch
|
||||
{
|
||||
void install_scalar()
|
||||
{
|
||||
gFloat4.add = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] + b[i];
|
||||
};
|
||||
|
||||
gFloat4.sub = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] - b[i];
|
||||
};
|
||||
|
||||
gFloat4.mul = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] * b[i];
|
||||
};
|
||||
|
||||
gFloat4.mul_scalar = [](const float* a, float s, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] * s;
|
||||
};
|
||||
|
||||
gFloat4.div = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] / b[i];
|
||||
};
|
||||
|
||||
gFloat4.div_scalar = [](const float* a, float s, float* r) {
|
||||
float denom = 1.0f / s;
|
||||
for (int i = 0; i < 4; i++) r[i] = a[i] * denom;
|
||||
};
|
||||
|
||||
gFloat4.dot = [](const float* a, const float* b) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 4; i++) sum += a[i] * b[i];
|
||||
return sum;
|
||||
};
|
||||
|
||||
gFloat4.length = [](const float* a) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 4; i++) sum += a[i] * a[i];
|
||||
return std::sqrt(sum);
|
||||
};
|
||||
|
||||
gFloat4.lengthSquared = [](const float* a) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 4; i++) sum += a[i] * a[i];
|
||||
return (sum);
|
||||
};
|
||||
|
||||
gFloat4.normalize = [](float* a) {
|
||||
float len = gFloat4.length(a);
|
||||
if (len > POINT_EPSILON)
|
||||
{
|
||||
float denom = 1.0f / len;
|
||||
for (int i = 0; i < 4; i++)
|
||||
a[i] *= denom;
|
||||
}
|
||||
};
|
||||
|
||||
gFloat4.normalize_mag = [](float* a, float f) {
|
||||
float len = gFloat4.length(a);
|
||||
if (len > POINT_EPSILON)
|
||||
{
|
||||
float denom = f / len;
|
||||
for (int i = 0; i < 4; i++) a[i] *= denom;
|
||||
}
|
||||
};
|
||||
|
||||
gFloat4.lerp = [](const float* from, const float* to, float f, float* r) {
|
||||
for (int i = 0; i < 4; i++) r[i] = from[i] + (to[i] - from[i]) * f;
|
||||
};
|
||||
|
||||
gFloat4.cross = [](const float* a, const float* b, float* r) {
|
||||
const float ax = a[0];
|
||||
const float ay = a[1];
|
||||
const float az = a[2];
|
||||
|
||||
const float bx = b[0];
|
||||
const float by = b[1];
|
||||
const float bz = b[2];
|
||||
|
||||
r[0] = ay * bz - az * by;
|
||||
r[1] = az * bx - ax * bz;
|
||||
r[2] = ax * by - ay * bx;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
namespace math_backend::float3::dispatch
|
||||
{
|
||||
void install_scalar()
|
||||
{
|
||||
gFloat3.add = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 3; i++) r[i] = a[i] + b[i];
|
||||
};
|
||||
|
||||
gFloat3.sub = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 3; i++) r[i] = a[i] - b[i];
|
||||
};
|
||||
|
||||
gFloat3.mul = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 3; i++) r[i] = a[i] * b[i];
|
||||
};
|
||||
|
||||
gFloat3.mul_scalar = [](const float* a, float s, float* r) {
|
||||
for (int i = 0; i < 3; i++) r[i] = a[i] * s;
|
||||
};
|
||||
|
||||
gFloat3.div = [](const float* a, const float* b, float* r) {
|
||||
for (int i = 0; i < 3; i++) r[i] = a[i] / b[i];
|
||||
};
|
||||
|
||||
gFloat3.div_scalar = [](const float* a, float s, float* r) {
|
||||
float denom = 1.0f / s;
|
||||
for (int i = 0; i < 3; i++) r[i] = a[i] * denom;
|
||||
};
|
||||
|
||||
gFloat3.dot = [](const float* a, const float* b) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 3; i++) sum += a[i] * b[i];
|
||||
return sum;
|
||||
};
|
||||
|
||||
gFloat3.length = [](const float* a) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 3; i++) sum += a[i] * a[i];
|
||||
return std::sqrt(sum);
|
||||
};
|
||||
|
||||
gFloat3.lengthSquared = [](const float* a) {
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < 3; i++) sum += a[i] * a[i];
|
||||
return (sum);
|
||||
};
|
||||
|
||||
gFloat3.normalize = [](float* a) {
|
||||
float len = gFloat3.length(a);
|
||||
if (len > POINT_EPSILON)
|
||||
{
|
||||
float denom = 1.0 / len;
|
||||
for (int i = 0; i < 3; i++) a[i] *= denom;
|
||||
}
|
||||
};
|
||||
|
||||
gFloat3.normalize_mag = [](float* a, float f) {
|
||||
float len = gFloat3.length(a);
|
||||
if (len > POINT_EPSILON)
|
||||
{
|
||||
float denom = f / len;
|
||||
for (int i = 0; i < 3; i++) a[i] *= denom;
|
||||
}
|
||||
};
|
||||
|
||||
gFloat3.lerp = [](const float* from, const float* to, float f, float* r) {
|
||||
for (int i = 0; i < 3; i++) r[i] = from[i] + (to[i] - from[i]) * f;
|
||||
};
|
||||
|
||||
gFloat3.cross = [](const float* a, const float* b, float* r) {
|
||||
const float ax = a[0];
|
||||
const float ay = a[1];
|
||||
const float az = a[2];
|
||||
|
||||
const float bx = b[0];
|
||||
const float by = b[1];
|
||||
const float bz = b[2];
|
||||
|
||||
r[0] = ay * bz - az * by;
|
||||
r[1] = az * bx - ax * bz;
|
||||
r[2] = ax * by - ay * bx;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
inline void swap(float& a, float& b)
|
||||
{
|
||||
float temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
|
||||
|
||||
namespace math_backend::mat44::dispatch
|
||||
{
|
||||
void install_scalar()
|
||||
{
|
||||
gMat44.transpose = [](float* a) {
|
||||
swap(a[1], a[4]);
|
||||
swap(a[2], a[8]);
|
||||
swap(a[3], a[12]);
|
||||
swap(a[6], a[9]);
|
||||
swap(a[7], a[13]);
|
||||
swap(a[11], a[14]);
|
||||
};
|
||||
|
||||
gMat44.scale = [](float* a, const float* s) {
|
||||
// Note, doesn't allow scaling w...
|
||||
|
||||
a[0] *= s[0]; a[1] *= s[1]; a[2] *= s[2];
|
||||
a[4] *= s[0]; a[5] *= s[1]; a[6] *= s[2];
|
||||
a[8] *= s[0]; a[9] *= s[1]; a[10] *= s[2];
|
||||
a[12] *= s[0]; a[13] *= s[1]; a[14] *= s[2];
|
||||
};
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue