add normal safety

wrap safety around normal checks, this was done on the scalar math may as well do it here just in case. Ad the impl.inl files to libraries so they can actually be found.
2026-03-19 20:30:58 +00:00 · 2026-03-04 23:49:08 +00:00 · 2026-03-04 23:49:08 +00:00 · 8c1acbd1da
commit 8c1acbd1da
parent 5a6467d54a
7 changed files with 61 additions and 14 deletions
--- a/Engine/source/math/impl/float3_impl.inl
+++ b/Engine/source/math/impl/float3_impl.inl
@ -85,9 +85,8 @@ namespace math_backend::float3
   inline void float3_normalize_impl(float* a)
   {
      f32x4 va = v_load3_vec(a);
-      f32x4 invLen = v_rsqrt_nr(v_dot3(va, va)); // fully abstracted
-      f32x4 vnorm = v_mul(va, invLen);
-      v_store3(a, vnorm);
+      f32x4 vr = v_normalize3(va);
+      v_store3(a, vr);
   }

   // Normalize with magnitude: r = normalize(a) * r
--- a/Engine/source/math/isa/avx/avx_intrinsics.h
+++ b/Engine/source/math/isa/avx/avx_intrinsics.h
@ -183,8 +183,15 @@ namespace

   inline f32x4 v_normalize3(f32x4 v)
   {
-      f32x4 inv = v_rsqrt_nr(v_dot3(v, v));
-      return _mm_mul_ps(v, inv);
+      const f32x4 zero     = _mm_setzero_ps();
+      const f32x4 fallback = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); // {0,0,1,0}
+      f32x4 dot = v_dot3(v, v);
+
+      f32x4 inv = v_rsqrt_nr(dot);
+      f32x4 isZero = _mm_cmpeq_ps(dot, zero);
+      f32x4 norm = _mm_mul_ps(v, inv);
+
+      return _mm_blendv_ps(norm, fallback, isZero);
   }

   // adds all 4 lanes together.
--- a/Engine/source/math/isa/avx2/avx2_intrinsics.h
+++ b/Engine/source/math/isa/avx2/avx2_intrinsics.h
@ -183,8 +183,15 @@ namespace

   inline f32x4 v_normalize3(f32x4 v)
   {
-      f32x4 inv = v_rsqrt_nr(v_dot3(v, v));
-      return _mm_mul_ps(v, inv);
+      const f32x4 zero     = _mm_setzero_ps();
+      const f32x4 fallback = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); // {0,0,1,0}
+      f32x4 dot = v_dot3(v, v);
+
+      f32x4 inv = v_rsqrt_nr(dot);
+      f32x4 isZero = _mm_cmpeq_ps(dot, zero);
+      f32x4 norm = _mm_mul_ps(v, inv);
+
+      return _mm_blendv_ps(norm, fallback, isZero);
   }

   // adds all 4 lanes together.
--- a/Engine/source/math/isa/neon/neon_intrinsics.h
+++ b/Engine/source/math/isa/neon/neon_intrinsics.h
@ -205,8 +205,19 @@ namespace

   inline f32x4 v_normalize3(f32x4 v)
   {
-      f32x4 inv = v_rsqrt_nr(v_dot3(v,v));
-      return vmulq_f32(v, inv);
+      const float32x4_t zero = vdupq_n_f32(0.0f);
+      const float32x4_t fallback = {0.0f, 0.0f, 1.0f, 0.0f};
+
+      f32x4 dot = v_dot3(v, v);
+
+      // dot == 0?
+      uint32x4_t isZero = vceqq_f32(dot, zero);
+
+      f32x4 inv = v_rsqrt_nr(dot);
+      f32x4 norm = vmulq_f32(v, inv);
+
+      // Select fallback when zero
+      return vbslq_f32(isZero, fallback, norm);
   }

   inline f32x4 v_hadd4(f32x4 a)
--- a/Engine/source/math/isa/sse2/sse2_intrinsics.h
+++ b/Engine/source/math/isa/sse2/sse2_intrinsics.h
@ -216,8 +216,22 @@ namespace

   inline f32x4 v_normalize3(f32x4 v)
   {
-      f32x4 inv = v_rsqrt_nr(v_dot3(v, v));
-      return _mm_mul_ps(v, inv);
+      const f32x4 zero     = _mm_setzero_ps();
+      const f32x4 fallback = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); // {0,0,1,0}
+      f32x4 dot = v_dot3(v, v);
+
+      f32x4 inv = v_rsqrt_nr(dot);
+      f32x4 isZero = _mm_cmpeq_ps(dot, zero);
+
+      f32x4 norm = _mm_mul_ps(v, inv);
+
+      // vbsl equivalent
+      f32x4 result = _mm_or_ps(
+         _mm_and_ps(isZero, fallback),
+         _mm_andnot_ps(isZero, norm)
+      );
+      
+      return result;
   }

   // adds all 4 lanes together.
--- a/Engine/source/math/isa/sse41/sse41_intrinsics.h
+++ b/Engine/source/math/isa/sse41/sse41_intrinsics.h
@ -195,8 +195,15 @@ namespace

   inline f32x4 v_normalize3(f32x4 v)
   {
-      f32x4 inv = v_rsqrt_nr(v_dot3(v, v));
-      return _mm_mul_ps(v, inv);
+      const f32x4 zero     = _mm_setzero_ps();
+      const f32x4 fallback = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); // {0,0,1,0}
+      f32x4 dot = v_dot3(v, v);
+
+      f32x4 inv = v_rsqrt_nr(dot);
+      f32x4 isZero = _mm_cmpeq_ps(dot, zero);
+      f32x4 norm = _mm_mul_ps(v, inv);
+
+      return _mm_blendv_ps(norm, fallback, isZero);
   }

   // adds all 4 lanes together.