From c09d5a457902fe5fb6f678ee247b60ad67f6c96f Mon Sep 17 00:00:00 2001 From: marauder2k7 Date: Thu, 5 Mar 2026 20:04:33 +0000 Subject: [PATCH] fix batch on neon --- Engine/source/math/impl/mat44_impl.inl | 4 +-- Engine/source/math/impl/math_c.cpp | 6 ++-- Engine/source/math/isa/neon/neon_intrinsics.h | 30 +++++++++---------- Engine/source/math/public/mat44_dispatch.h | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Engine/source/math/impl/mat44_impl.inl b/Engine/source/math/impl/mat44_impl.inl index c203ecf5e..1bf009a50 100644 --- a/Engine/source/math/impl/mat44_impl.inl +++ b/Engine/source/math/impl/mat44_impl.inl @@ -392,9 +392,9 @@ namespace math_backend::mat44 // MATRIX BATCH FUNCTIONS //-------------------------------------------------- - inline void mat44_batch_mul_pos3(const float* m, const float* points, size_t count, float* result) + inline void mat44_batch_mul_pos3(const float* m, const float* points, int count, float* result) { - size_t i = 0; + int i = 0; f32x4x4 ma = m_load(m); // AVX has 8 lanes to play with diff --git a/Engine/source/math/impl/math_c.cpp b/Engine/source/math/impl/math_c.cpp index 6d45350fa..285f43abc 100644 --- a/Engine/source/math/impl/math_c.cpp +++ b/Engine/source/math/impl/math_c.cpp @@ -488,11 +488,11 @@ namespace math_backend::mat44::dispatch }; - gMat44.batch_mul_pos3 = [](const float* m, const float* pts, size_t count, float* result_ptrs) { - size_t i = 0; + gMat44.batch_mul_pos3 = [](const float* m, const float* pts, int count, float* result_ptrs) { + int i = 0; for (; i < count; i++) { - size_t idx = i * 3; + int idx = i * 3; gMat44.mul_pos3(m, &pts[idx], &result_ptrs[idx]); } }; diff --git a/Engine/source/math/isa/neon/neon_intrinsics.h b/Engine/source/math/isa/neon/neon_intrinsics.h index a02da6391..fc93c9c97 100644 --- a/Engine/source/math/isa/neon/neon_intrinsics.h +++ b/Engine/source/math/isa/neon/neon_intrinsics.h @@ -422,9 +422,9 @@ namespace { vec4_batch4 r; - r.x = (f32x4){ ptr[9], ptr[6], ptr[3], ptr[0] }; - r.y = (f32x4){ ptr[10], ptr[7], ptr[4], ptr[1] }; - r.z = (f32x4){ ptr[11], ptr[8], ptr[5], ptr[2] }; + r.x = (f32x4){ ptr[0], ptr[3], ptr[6], ptr[9] }; + r.y = (f32x4){ ptr[1], ptr[4], ptr[7], ptr[10] }; + r.z = (f32x4){ ptr[2], ptr[5], ptr[8], ptr[11] }; if (fillW) { @@ -456,20 +456,20 @@ namespace { vec4_batch4 r; - float32x4_t m00 = vdupq_n_f32(m.r0.m128_f32[0]); - float32x4_t m01 = vdupq_n_f32(m.r0.m128_f32[1]); - float32x4_t m02 = vdupq_n_f32(m.r0.m128_f32[2]); - float32x4_t m03 = vdupq_n_f32(m.r0.m128_f32[3]); + float32x4_t m00 = vdupq_n_f32(m.r0[0]); + float32x4_t m01 = vdupq_n_f32(m.r0[1]); + float32x4_t m02 = vdupq_n_f32(m.r0[2]); + float32x4_t m03 = vdupq_n_f32(m.r0[3]); - float32x4_t m10 = vdupq_n_f32(m.r1.m128_f32[0]); - float32x4_t m11 = vdupq_n_f32(m.r1.m128_f32[1]); - float32x4_t m12 = vdupq_n_f32(m.r1.m128_f32[2]); - float32x4_t m13 = vdupq_n_f32(m.r1.m128_f32[3]); + float32x4_t m10 = vdupq_n_f32(m.r1[0]); + float32x4_t m11 = vdupq_n_f32(m.r1[1]); + float32x4_t m12 = vdupq_n_f32(m.r1[2]); + float32x4_t m13 = vdupq_n_f32(m.r1[3]); - float32x4_t m20 = vdupq_n_f32(m.r2.m128_f32[0]); - float32x4_t m21 = vdupq_n_f32(m.r2.m128_f32[1]); - float32x4_t m22 = vdupq_n_f32(m.r2.m128_f32[2]); - float32x4_t m23 = vdupq_n_f32(m.r2.m128_f32[3]); + float32x4_t m20 = vdupq_n_f32(m.r2[0]); + float32x4_t m21 = vdupq_n_f32(m.r2[1]); + float32x4_t m22 = vdupq_n_f32(m.r2[2]); + float32x4_t m23 = vdupq_n_f32(m.r2[3]); // row0 dot r.x = vaddq_f32( diff --git a/Engine/source/math/public/mat44_dispatch.h b/Engine/source/math/public/mat44_dispatch.h index 6f6ade3ab..b49ef9b84 100644 --- a/Engine/source/math/public/mat44_dispatch.h +++ b/Engine/source/math/public/mat44_dispatch.h @@ -21,7 +21,7 @@ namespace math_backend::mat44::dispatch void (*scale)(float*, const float*) = nullptr; void (*get_scale)(const float*, float*) = nullptr; - void (*batch_mul_pos3)(const float* m, const float* pts, size_t count, float* result_ptrs) = nullptr; + void (*batch_mul_pos3)(const float* m, const float* pts, int count, float* result_ptrs) = nullptr; }; // Global dispatch table