mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-03-19 12:20:57 +00:00
fix batch on neon
This commit is contained in:
parent
add7f2a5d7
commit
c09d5a4579
4 changed files with 21 additions and 21 deletions
|
|
@ -392,9 +392,9 @@ namespace math_backend::mat44
|
|||
// MATRIX BATCH FUNCTIONS
|
||||
//--------------------------------------------------
|
||||
|
||||
inline void mat44_batch_mul_pos3(const float* m, const float* points, size_t count, float* result)
|
||||
inline void mat44_batch_mul_pos3(const float* m, const float* points, int count, float* result)
|
||||
{
|
||||
size_t i = 0;
|
||||
int i = 0;
|
||||
f32x4x4 ma = m_load(m);
|
||||
|
||||
// AVX has 8 lanes to play with
|
||||
|
|
|
|||
|
|
@ -488,11 +488,11 @@ namespace math_backend::mat44::dispatch
|
|||
|
||||
};
|
||||
|
||||
gMat44.batch_mul_pos3 = [](const float* m, const float* pts, size_t count, float* result_ptrs) {
|
||||
size_t i = 0;
|
||||
gMat44.batch_mul_pos3 = [](const float* m, const float* pts, int count, float* result_ptrs) {
|
||||
int i = 0;
|
||||
for (; i < count; i++)
|
||||
{
|
||||
size_t idx = i * 3;
|
||||
int idx = i * 3;
|
||||
gMat44.mul_pos3(m, &pts[idx], &result_ptrs[idx]);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -422,9 +422,9 @@ namespace
|
|||
{
|
||||
vec4_batch4 r;
|
||||
|
||||
r.x = (f32x4){ ptr[9], ptr[6], ptr[3], ptr[0] };
|
||||
r.y = (f32x4){ ptr[10], ptr[7], ptr[4], ptr[1] };
|
||||
r.z = (f32x4){ ptr[11], ptr[8], ptr[5], ptr[2] };
|
||||
r.x = (f32x4){ ptr[0], ptr[3], ptr[6], ptr[9] };
|
||||
r.y = (f32x4){ ptr[1], ptr[4], ptr[7], ptr[10] };
|
||||
r.z = (f32x4){ ptr[2], ptr[5], ptr[8], ptr[11] };
|
||||
|
||||
if (fillW)
|
||||
{
|
||||
|
|
@ -456,20 +456,20 @@ namespace
|
|||
{
|
||||
vec4_batch4 r;
|
||||
|
||||
float32x4_t m00 = vdupq_n_f32(m.r0.m128_f32[0]);
|
||||
float32x4_t m01 = vdupq_n_f32(m.r0.m128_f32[1]);
|
||||
float32x4_t m02 = vdupq_n_f32(m.r0.m128_f32[2]);
|
||||
float32x4_t m03 = vdupq_n_f32(m.r0.m128_f32[3]);
|
||||
float32x4_t m00 = vdupq_n_f32(m.r0[0]);
|
||||
float32x4_t m01 = vdupq_n_f32(m.r0[1]);
|
||||
float32x4_t m02 = vdupq_n_f32(m.r0[2]);
|
||||
float32x4_t m03 = vdupq_n_f32(m.r0[3]);
|
||||
|
||||
float32x4_t m10 = vdupq_n_f32(m.r1.m128_f32[0]);
|
||||
float32x4_t m11 = vdupq_n_f32(m.r1.m128_f32[1]);
|
||||
float32x4_t m12 = vdupq_n_f32(m.r1.m128_f32[2]);
|
||||
float32x4_t m13 = vdupq_n_f32(m.r1.m128_f32[3]);
|
||||
float32x4_t m10 = vdupq_n_f32(m.r1[0]);
|
||||
float32x4_t m11 = vdupq_n_f32(m.r1[1]);
|
||||
float32x4_t m12 = vdupq_n_f32(m.r1[2]);
|
||||
float32x4_t m13 = vdupq_n_f32(m.r1[3]);
|
||||
|
||||
float32x4_t m20 = vdupq_n_f32(m.r2.m128_f32[0]);
|
||||
float32x4_t m21 = vdupq_n_f32(m.r2.m128_f32[1]);
|
||||
float32x4_t m22 = vdupq_n_f32(m.r2.m128_f32[2]);
|
||||
float32x4_t m23 = vdupq_n_f32(m.r2.m128_f32[3]);
|
||||
float32x4_t m20 = vdupq_n_f32(m.r2[0]);
|
||||
float32x4_t m21 = vdupq_n_f32(m.r2[1]);
|
||||
float32x4_t m22 = vdupq_n_f32(m.r2[2]);
|
||||
float32x4_t m23 = vdupq_n_f32(m.r2[3]);
|
||||
|
||||
// row0 dot
|
||||
r.x = vaddq_f32(
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ namespace math_backend::mat44::dispatch
|
|||
void (*scale)(float*, const float*) = nullptr;
|
||||
void (*get_scale)(const float*, float*) = nullptr;
|
||||
|
||||
void (*batch_mul_pos3)(const float* m, const float* pts, size_t count, float* result_ptrs) = nullptr;
|
||||
void (*batch_mul_pos3)(const float* m, const float* pts, int count, float* result_ptrs) = nullptr;
|
||||
};
|
||||
|
||||
// Global dispatch table
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue