neon implementation

removed some x86 intrinsic functions that were in the mat44_impl file reinstated some mMath_C functions and mMathFn ptrs trying to diagnose an issue. Had to come up with a different way to initialize the scalar table if the isa tables are not initialized yet. Mac did not like the static initialization. Had to change neon over to using explicit masks for shifting, cross product was failing during bakes and matrix calculations
2026-03-19 12:20:57 +00:00 · 2026-03-04 08:41:57 +00:00 · 2026-03-04 08:41:57 +00:00 · 0ba8d948fb
commit 0ba8d948fb
parent bb1478a8c3
10 changed files with 521 additions and 142 deletions
--- a/Engine/source/math/impl/mat44_impl.inl
+++ b/Engine/source/math/impl/mat44_impl.inl
@ -239,24 +239,6 @@ namespace math_backend::mat44
      m_store(m, r);
   }

-   inline void mat44_trs_impl(float* m, const float* t, const float* r_euler, const float* s)
-   {
-      f32x4x4 mr;
-      mat44_rotation_euler_impl((float*)&mr, r_euler[0], r_euler[1], r_euler[2]);
-
-      f32x4 vs = v_load3_vec(s); // scale xyz
-      mr.r0 = v_mul(mr.r0, vs);
-      mr.r1 = v_mul(mr.r1, vs);
-      mr.r2 = v_mul(mr.r2, vs);
-
-      mr.r0 = v_insert_w(mr.r0, _mm_set_ss(t[0]));
-      mr.r1 = v_insert_w(mr.r1, _mm_set_ss(t[1]));
-      mr.r2 = v_insert_w(mr.r2, _mm_set_ss(t[2]));
-      mr.r3 = v_set(0, 0, 0, 1.0f);
-
-      m_store(m, mr);
-   }
-
   inline void mat44_lookat_impl(float* m, const float* eye, const float* target, const float* up)
   {
      f32x4 vEye = v_load3_pos(eye);
--- a/Engine/source/math/impl/math_c.cpp
+++ b/Engine/source/math/impl/math_c.cpp
@ -318,26 +318,26 @@ namespace math_backend::mat44::dispatch
         r[2] = a[8] * b[0] + a[9] * b[1] + a[10] * b[2];
      };

-      gMat44.mul_mat44 = [](const float* a, const float* b, float* r) {
-         r[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12];
-         r[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13];
-         r[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14];
-         r[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15];
+      gMat44.mul_mat44 = [](const float* a, const float* b, float* mresult) {
+         mresult[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8]  + a[3]*b[12];
+         mresult[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9]  + a[3]*b[13];
+         mresult[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14];
+         mresult[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15];

-         r[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12];
-         r[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13];
-         r[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14];
-         r[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15];
+         mresult[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8]  + a[7]*b[12];
+         mresult[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9]  + a[7]*b[13];
+         mresult[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14];
+         mresult[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15];

-         r[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12];
-         r[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13];
-         r[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14];
-         r[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15];
+         mresult[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12];
+         mresult[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13];
+         mresult[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14];
+         mresult[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15];

-         r[12] = a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12];
-         r[13] = a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13];
-         r[14] = a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14];
-         r[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15];
+         mresult[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12];
+         mresult[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13];
+         mresult[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14];
+         mresult[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
      };

      gMat44.normalize = [](float* a) {