moar changes

mac implementation had _mm_div (x86 intrinsic)
This commit is contained in:
marauder2k7 2026-02-26 16:40:01 +00:00
parent 4e7fdd167b
commit 9ebcee420f
9 changed files with 248 additions and 16 deletions

View file

@ -34,6 +34,15 @@ namespace
__m128 t2 = _mm_hadd_ps(t1, t1); // sums again: first element = a0+a1+a2+a3
return _mm_cvtss_f32(t2); // extract first element
}
// specialized dot product for AVX
float float4_dot_avx(const float* a, const float* b)
{
f32x4 va = _mm_loadu_ps(a);
f32x4 vb = _mm_loadu_ps(b);
__m128 dp = _mm_dp_ps(va, vb, 0xF1); // multiply all 4, sum all 4, lowest lane
return _mm_cvtss_f32(dp);
}
}
#include "float4_impl.inl"
@ -49,7 +58,7 @@ namespace math_backend::float4::dispatch
gFloat4.mul_scalar = float4_mul_scalar_impl;
gFloat4.div = float4_div_impl;
gFloat4.div_scalar = float4_div_scalar_impl;
gFloat4.dot = float4_dot_impl;
gFloat4.dot = float4_dot_avx;
gFloat4.length = float4_length_impl;
gFloat4.lengthSquared = float4_length_squared_impl;
gFloat4.normalize = float4_normalize_impl;