diff --git a/Engine/source/math/isa/neon/neon_intrinsics.h b/Engine/source/math/isa/neon/neon_intrinsics.h index e118b1600..fdd291e48 100644 --- a/Engine/source/math/isa/neon/neon_intrinsics.h +++ b/Engine/source/math/isa/neon/neon_intrinsics.h @@ -59,6 +59,24 @@ namespace ); } + inline f32x4 v_swizzle_mask(f32x4 v, const int x, const int y, const int z, const int w) + { + uint8x16_t table = vreinterpretq_u8_f32(v); + + // Each float = 4 bytes, so build byte indices + uint8_t idx_data[16] = { + x * 4 + 0, x * 4 + 1, x * 4 + 2, x * 4 + 3, + y * 4 + 0, y * 4 + 1, y * 4 + 2, y * 4 + 3, + z * 4 + 0, z * 4 + 1, z * 4 + 2, z * 4 + 3, + w * 4 + 0, w * 4 + 1, w * 4 + 2, w * 4 + 3 + }; + + uint8x16_t idx = vld1q_u8(idx_data); + + uint8x16_t result = vqtbl1q_u8(table, idx); + return vreinterpretq_f32_u8(result); + } + inline f32x4 v_swizzle_singular_mask(f32x4 v, int x) { // base byte index of the float lane