diff --git a/Engine/source/math/mMathAMD.cpp b/Engine/source/math/mMathAMD.cpp deleted file mode 100644 index 2df3729ef..000000000 --- a/Engine/source/math/mMathAMD.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//----------------------------------------------------------------------------- -// Copyright (c) 2012 GarageGames, LLC -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//----------------------------------------------------------------------------- - -#include "math/mMathFn.h" -#include "math/mPlane.h" -#include "math/mMatrix.h" - - -// extern void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult); -// extern void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult); - -/* not currently implemented. -void Athlon_MatrixF_x_Point3F(const F32 *m, const F32 *p, F32 *presult) -{ - m; - p; - presult; -} -*/ - -//============================================================ -// Here's the C code for MatF_x_MatF: -// note that the code below does it in a different order (optimal asm, after all!) -// -// r[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8] + a[3]*b[12]; -// r[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9] + a[3]*b[13]; -// r[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14]; -// r[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15]; -// -// r[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8] + a[7]*b[12]; -// r[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9] + a[7]*b[13]; -// r[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14]; -// r[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15]; -// -// r[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12]; -// r[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13]; -// r[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14]; -// r[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15]; -// -// r[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12]; -// r[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13]; -// r[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14]; -// r[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15]; -//============================================================ - -#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM) -#define ADD_3DNOW_FUNCS -// inlined version here. -void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result) -{ - __asm - { - femms - - mov ecx, matA - mov edx, matB - mov eax, result - - prefetch [ecx+32] ;// These may help - - prefetch [edx+32] ;// and probably don't hurt - - movq mm0,[ecx] ;// a21 | a11 - movq mm1,[ecx+8] ;// a41 | a31 - movq mm4,[edx] ;// b21 | b11 - punpckhdq mm2,mm0 ;// a21 | - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a41 | - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a11 | a11 - punpckldq mm1,mm1 ;// a31 | a31 - pfmul mm4,mm0 ;// a11*b21 | a11*b11 - punpckhdq mm2,mm2 ;// a21 | a21 - pfmul mm0,[edx+8] ;// a11*b41 | a11*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a21*b22 | a21*b12 - punpckhdq mm3,mm3 ;// a41 | a41 - pfmul mm2,[edx+24] ;// a21*b42 | a21*b32 - pfmul mm6,mm1 ;// a31*b23 | a31*b13 - pfadd mm5,mm4 ;// a21*b22 + a11*b21 | a21*b12 + a11*b11 - pfmul mm1,[edx+40] ;// a31*b43 | a31*b33 - pfadd mm2,mm0 ;// a21*b42 + a11*b41 | a21*b32 + a11*b31 - pfmul mm7,mm3 ;// a41*b24 | a41*b14 - pfadd mm6,mm5 ;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13 - pfmul mm3,[edx+56] ;// a41*b44 | a41*b34 - pfadd mm2,mm1 ;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33 - pfadd mm7,mm6 ;// a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13 - movq mm0,[ecx+16] ;// a22 | a12 - pfadd mm3,mm2 ;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33 - movq mm1,[ecx+24] ;// a42 | a32 - movq [eax],mm7 ;// r21 | r11 - movq mm4,[edx] ;// b21 | b11 - movq [eax+8],mm3 ;// r41 | r31 - - punpckhdq mm2,mm0 ;// a22 | XXX - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a42 | XXX - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a12 | a12 - punpckldq mm1,mm1 ;// a32 | a32 - pfmul mm4,mm0 ;// a12*b21 | a12*b11 - punpckhdq mm2,mm2 ;// a22 | a22 - pfmul mm0,[edx+8] ;// a12*b41 | a12*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a22*b22 | a22*b12 - punpckhdq mm3,mm3 ;// a42 | a42 - pfmul mm2,[edx+24] ;// a22*b42 | a22*b32 - pfmul mm6,mm1 ;// a32*b23 | a32*b13 - pfadd mm5,mm4 ;// a12*b21 + a22*b22 | a12*b11 + a22*b12 - pfmul mm1,[edx+40] ;// a32*b43 | a32*b33 - pfadd mm2,mm0 ;// a12*b41 + a22*b42 | a12*b11 + a22*b32 - pfmul mm7,mm3 ;// a42*b24 | a42*b14 - pfadd mm6,mm5 ;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12 - pfmul mm3,[edx+56] ;// a42*b44 | a42*b34 - pfadd mm2,mm1 ;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32 - pfadd mm7,mm6 ;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12 - movq mm0,[ecx+32] ;// a23 | a13 - pfadd mm3,mm2 ;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32 - movq mm1,[ecx+40] ;// a43 | a33 - movq [eax+16],mm7 ;// r22 | r12 - movq mm4,[edx] ;// b21 | b11 - movq [eax+24],mm3 ;// r42 | r32 - - punpckhdq mm2,mm0 ;// a23 | XXX - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a43 | XXX - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a13 | a13 - punpckldq mm1,mm1 ;// a33 | a33 - pfmul mm4,mm0 ;// a13*b21 | a13*b11 - punpckhdq mm2,mm2 ;// a23 | a23 - pfmul mm0,[edx+8] ;// a13*b41 | a13*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a23*b22 | a23*b12 - punpckhdq mm3,mm3 ;// a43 | a43 - pfmul mm2,[edx+24] ;// a23*b42 | a23*b32 - pfmul mm6,mm1 ;// a33*b23 | a33*b13 - pfadd mm5,mm4 ;// a23*b22 + a13*b21 | a23*b12 + a13*b11 - pfmul mm1,[edx+40] ;// a33*b43 | a33*b33 - pfadd mm2,mm0 ;// a13*b41 + a23*b42 | a13*b31 + a23*b32 - pfmul mm7,mm3 ;// a43*b24 | a43*b14 - pfadd mm6,mm5 ;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11 - pfmul mm3,[edx+56] ;// a43*b44 | a43*b34 - pfadd mm2,mm1 ;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32 - pfadd mm7,mm6 ;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11 - movq mm0,[ecx+48] ;// a24 | a14 - pfadd mm3,mm2 ;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32 - movq mm1,[ecx+56] ;// a44 | a34 - movq [eax+32],mm7 ;// r23 | r13 - movq mm4,[edx] ;// b21 | b11 - movq [eax+40],mm3 ;// r43 | r33 - - punpckhdq mm2,mm0 ;// a24 | XXX - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a44 | XXX - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a14 | a14 - punpckldq mm1,mm1 ;// a34 | a34 - pfmul mm4,mm0 ;// a14*b21 | a14*b11 - punpckhdq mm2,mm2 ;// a24 | a24 - pfmul mm0,[edx+8] ;// a14*b41 | a14*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a24*b22 | a24*b12 - punpckhdq mm3,mm3 ;// a44 | a44 - pfmul mm2,[edx+24] ;// a24*b 42 | a24*b32 - pfmul mm6,mm1 ;// a34*b23 | a34*b13 - pfadd mm5,mm4 ;// a14*b21 + a24*b22 | a14*b11 + a24*b12 - pfmul mm1,[edx+40] ;// a34*b43 | a34*b33 - pfadd mm2,mm0 ;// a14*b41 + a24*b 42 | a14*b31 + a24*b32 - pfmul mm7,mm3 ;// a44*b24 | a44*b14 - pfadd mm6,mm5 ;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12 - pfmul mm3,[edx+56] ;// a44*b44 | a44*b34 - pfadd mm2,mm1 ;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32 - pfadd mm7,mm6 ;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32 - pfadd mm3,mm2 ;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32 - movq [eax+48],mm7 ;// r24 | r14 - movq [eax+56],mm3 ;// r44 | r34 - femms - } -} -#elif defined(TORQUE_SUPPORTS_NASM) -#define ADD_3DNOW_FUNCS -extern "C" -{ - void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result); -} - -#endif - -void mInstall_AMD_Math() -{ -#if defined(ADD_3DNOW_FUNCS) - m_matF_x_matF = Athlon_MatrixF_x_MatrixF; -#endif - // m_matF_x_point3F = Athlon_MatrixF_x_Point3F; - // m_matF_x_vectorF = Athlon_MatrixF_x_VectorF; -} - diff --git a/Engine/source/math/mMathAMD_ASM.asm b/Engine/source/math/mMathAMD_ASM.asm deleted file mode 100644 index 74d3fa1ec..000000000 --- a/Engine/source/math/mMathAMD_ASM.asm +++ /dev/null @@ -1,177 +0,0 @@ -;----------------------------------------------------------------------------- -; Copyright (c) 2012 GarageGames, LLC -; -; Permission is hereby granted, free of charge, to any person obtaining a copy -; of this software and associated documentation files (the "Software"), to -; deal in the Software without restriction, including without limitation the -; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -; sell copies of the Software, and to permit persons to whom the Software is -; furnished to do so, subject to the following conditions: -; -; The above copyright notice and this permission notice shall be included in -; all copies or substantial portions of the Software. -; -; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -; IN THE SOFTWARE. -;----------------------------------------------------------------------------- - - -segment .data - -matA dd 0 -result dd 0 -matB dd 0 - -segment .text - -%macro export_fn 1 - %ifidn __OUTPUT_FORMAT__, elf - ; No underscore needed for ELF object files - global %1 - %1: - %else - global _%1 - _%1: - %endif -%endmacro - - -%define arg(x) [esp+(x*4)] - - - -;void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result) - -export_fn Athlon_MatrixF_x_MatrixF - - mov ecx, arg(1) - mov edx, arg(2) - mov eax, arg(3) - - femms - prefetch [ecx+32] ; These may help - - prefetch [edx+32] ; and probably don't hurt - - movq mm0,[ecx] ; a21 | a11 - movq mm1,[ecx+8] ; a41 | a31 - movq mm4,[edx] ; b21 | b11 - punpckhdq mm2,mm0 ; a21 | - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a41 | - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a11 | a11 - punpckldq mm1,mm1 ; a31 | a31 - pfmul mm4,mm0 ; a11*b21 | a11*b11 - punpckhdq mm2,mm2 ; a21 | a21 - pfmul mm0,[edx+8] ; a11*b41 | a11*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a21*b22 | a21*b12 - punpckhdq mm3,mm3 ; a41 | a41 - pfmul mm2,[edx+24] ; a21*b42 | a21*b32 - pfmul mm6,mm1 ; a31*b23 | a31*b13 - pfadd mm5,mm4 ; a21*b22 + a11*b21 | a21*b12 + a11*b11 - pfmul mm1,[edx+40] ; a31*b43 | a31*b33 - pfadd mm2,mm0 ; a21*b42 + a11*b41 | a21*b32 + a11*b31 - pfmul mm7,mm3 ; a41*b24 | a41*b14 - pfadd mm6,mm5 ; a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13 - pfmul mm3,[edx+56] ; a41*b44 | a41*b34 - pfadd mm2,mm1 ; a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33 - pfadd mm7,mm6 ; a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13 - movq mm0,[ecx+16] ; a22 | a12 - pfadd mm3,mm2 ; a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33 - movq mm1,[ecx+24] ; a42 | a32 - movq [eax],mm7 ; r21 | r11 - movq mm4,[edx] ; b21 | b11 - movq [eax+8],mm3 ; r41 | r31 - - punpckhdq mm2,mm0 ; a22 | XXX - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a42 | XXX - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a12 | a12 - punpckldq mm1,mm1 ; a32 | a32 - pfmul mm4,mm0 ; a12*b21 | a12*b11 - punpckhdq mm2,mm2 ; a22 | a22 - pfmul mm0,[edx+8] ; a12*b41 | a12*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a22*b22 | a22*b12 - punpckhdq mm3,mm3 ; a42 | a42 - pfmul mm2,[edx+24] ; a22*b42 | a22*b32 - pfmul mm6,mm1 ; a32*b23 | a32*b13 - pfadd mm5,mm4 ; a12*b21 + a22*b22 | a12*b11 + a22*b12 - pfmul mm1,[edx+40] ; a32*b43 | a32*b33 - pfadd mm2,mm0 ; a12*b41 + a22*b42 | a12*b11 + a22*b32 - pfmul mm7,mm3 ; a42*b24 | a42*b14 - pfadd mm6,mm5 ; a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12 - pfmul mm3,[edx+56] ; a42*b44 | a42*b34 - pfadd mm2,mm1 ; a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32 - pfadd mm7,mm6 ; a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12 - movq mm0,[ecx+32] ; a23 | a13 - pfadd mm3,mm2 ; a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32 - movq mm1,[ecx+40] ; a43 | a33 - movq [eax+16],mm7 ; r22 | r12 - movq mm4,[edx] ; b21 | b11 - movq [eax+24],mm3 ; r42 | r32 - - punpckhdq mm2,mm0 ; a23 | XXX - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a43 | XXX - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a13 | a13 - punpckldq mm1,mm1 ; a33 | a33 - pfmul mm4,mm0 ; a13*b21 | a13*b11 - punpckhdq mm2,mm2 ; a23 | a23 - pfmul mm0,[edx+8] ; a13*b41 | a13*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a23*b22 | a23*b12 - punpckhdq mm3,mm3 ; a43 | a43 - pfmul mm2,[edx+24] ; a23*b42 | a23*b32 - pfmul mm6,mm1 ; a33*b23 | a33*b13 - pfadd mm5,mm4 ; a23*b22 + a13*b21 | a23*b12 + a13*b11 - pfmul mm1,[edx+40] ; a33*b43 | a33*b33 - pfadd mm2,mm0 ; a13*b41 + a23*b42 | a13*b31 + a23*b32 - pfmul mm7,mm3 ; a43*b24 | a43*b14 - pfadd mm6,mm5 ; a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11 - pfmul mm3,[edx+56] ; a43*b44 | a43*b34 - pfadd mm2,mm1 ; a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32 - pfadd mm7,mm6 ; a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11 - movq mm0,[ecx+48] ; a24 | a14 - pfadd mm3,mm2 ; a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32 - movq mm1,[ecx+56] ; a44 | a34 - movq [eax+32],mm7 ; r23 | r13 - movq mm4,[edx] ; b21 | b11 - movq [eax+40],mm3 ; r43 | r33 - - punpckhdq mm2,mm0 ; a24 | XXX - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a44 | XXX - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a14 | a14 - punpckldq mm1,mm1 ; a34 | a34 - pfmul mm4,mm0 ; a14*b21 | a14*b11 - punpckhdq mm2,mm2 ; a24 | a24 - pfmul mm0,[edx+8] ; a14*b41 | a14*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a24*b22 | a24*b12 - punpckhdq mm3,mm3 ; a44 | a44 - pfmul mm2,[edx+24] ; a24*b 42 | a24*b32 - pfmul mm6,mm1 ; a34*b23 | a34*b13 - pfadd mm5,mm4 ; a14*b21 + a24*b22 | a14*b11 + a24*b12 - pfmul mm1,[edx+40] ; a34*b43 | a34*b33 - pfadd mm2,mm0 ; a14*b41 + a24*b 42 | a14*b31 + a24*b32 - pfmul mm7,mm3 ; a44*b24 | a44*b14 - pfadd mm6,mm5 ; a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12 - pfmul mm3,[edx+56] ; a44*b44 | a44*b34 - pfadd mm2,mm1 ; a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32 - pfadd mm7,mm6 ; a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32 - pfadd mm3,mm2 ; a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32 - movq [eax+48],mm7 ; r24 | r14 - movq [eax+56],mm3 ; r44 | r34 - femms - - ret diff --git a/Engine/source/math/test/mMatrixTest.cpp b/Engine/source/math/test/mMatrixTest.cpp index beb0b1d13..b5276ce22 100644 --- a/Engine/source/math/test/mMatrixTest.cpp +++ b/Engine/source/math/test/mMatrixTest.cpp @@ -60,19 +60,6 @@ TEST(MatrixF, MultiplyImplmentations) U32 cpuProperties = Platform::SystemInfo.processor.properties; bool same; - // Test 3D NOW! if it is available - F32 mrAMD[16]; - if (cpuProperties & CPU_PROP_3DNOW) - { - Athlon_MatrixF_x_MatrixF(m1, m2, mrAMD); - - same = true; - for (S32 i = 0; i < 16; i++) - same &= mIsEqual(mrC[i], mrAMD[i]); - - EXPECT_TRUE(same) << "Matrix multiplication verification failed. (C vs. 3D NOW!)"; - } - // Test SSE if it is available F32 mrSSE[16]; if (cpuProperties & CPU_PROP_SSE) diff --git a/Engine/source/platform/platform.h b/Engine/source/platform/platform.h index 6073786b7..acb929a9f 100644 --- a/Engine/source/platform/platform.h +++ b/Engine/source/platform/platform.h @@ -68,18 +68,17 @@ enum ProcessorProperties CPU_PROP_C = (1<<0), ///< We should use C fallback math functions. CPU_PROP_FPU = (1<<1), ///< Has an FPU. (It better!) CPU_PROP_MMX = (1<<2), ///< Supports MMX instruction set extension. - CPU_PROP_3DNOW = (1<<3), ///< Supports AMD 3dNow! instruction set extension. - CPU_PROP_SSE = (1<<4), ///< Supports SSE instruction set extension. - CPU_PROP_SSE2 = (1<<5), ///< Supports SSE2 instruction set extension. - CPU_PROP_SSE3 = (1<<6), ///< Supports SSE3 instruction set extension. - CPU_PROP_SSE3ex = (1<<7), ///< Supports Supplemental SSE3 instruction set - CPU_PROP_SSE4_1 = (1<<8), ///< Supports SSE4_1 instruction set extension. - CPU_PROP_SSE4_2 = (1<<9), ///< Supports SSE4_2 instruction set extension. - CPU_PROP_AVX = (1<<10), ///< Supports AVX256 instruction set extension. - CPU_PROP_MP = (1<<11), ///< This is a multi-processor system. - CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN. - CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable - CPU_PROP_NEON = (1<<14), ///< Supports the Arm Neon instruction set extension. + CPU_PROP_SSE = (1<<3), ///< Supports SSE instruction set extension. + CPU_PROP_SSE2 = (1<<4), ///< Supports SSE2 instruction set extension. + CPU_PROP_SSE3 = (1<<5), ///< Supports SSE3 instruction set extension. + CPU_PROP_SSE3ex = (1<<6), ///< Supports Supplemental SSE3 instruction set + CPU_PROP_SSE4_1 = (1<<7), ///< Supports SSE4_1 instruction set extension. + CPU_PROP_SSE4_2 = (1<<8), ///< Supports SSE4_2 instruction set extension. + CPU_PROP_AVX = (1<<9), ///< Supports AVX256 instruction set extension. + CPU_PROP_MP = (1<<10), ///< This is a multi-processor system. + CPU_PROP_LE = (1<<11), ///< This processor is LITTLE ENDIAN. + CPU_PROP_64bit = (1<<12), ///< This processor is 64-bit capable + CPU_PROP_NEON = (1<<13), ///< Supports the Arm Neon instruction set extension. }; /// Processor info manager. diff --git a/Engine/source/platform/platformCPU.cpp b/Engine/source/platform/platformCPU.cpp index 8449d1daa..f3246c6ed 100644 --- a/Engine/source/platform/platformCPU.cpp +++ b/Engine/source/platform/platformCPU.cpp @@ -28,7 +28,8 @@ Signal Platform::SystemInfoReady; -void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand) +// fill the specified structure with information obtained from asm code +void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand) { if (dStricmp(vendor, "GenuineIntel") == 0) { @@ -70,4 +71,4 @@ void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor // Trigger the signal Platform::SystemInfoReady.trigger(); -} +} \ No newline at end of file diff --git a/Engine/source/platform/platformCPUCount.h b/Engine/source/platform/platformCPUCount.h index d008201b3..66a0c2e12 100644 --- a/Engine/source/platform/platformCPUCount.h +++ b/Engine/source/platform/platformCPUCount.h @@ -64,7 +64,7 @@ namespace CPUInfo EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores ); } // namespace CPUInfo -void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand); +void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand); #endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_ diff --git a/Engine/source/platformMac/macCPU.mm b/Engine/source/platformMac/macCPU.mm index d93bcf25f..001d537a0 100644 --- a/Engine/source/platformMac/macCPU.mm +++ b/Engine/source/platformMac/macCPU.mm @@ -190,7 +190,7 @@ void Processor::init() detectCpuFeatures(procflags); Platform::SystemInfo.processor.properties = procflags; - SetProcessoInfo(Platform::SystemInfo.processor, vendor, brandString); + SetProcessorInfo(Platform::SystemInfo.processor, vendor, brandString); Con::printf("System & Processor Information:"); diff --git a/Engine/source/platformMac/macMath.mm b/Engine/source/platformMac/macMath.mm index 542fa27c8..4feefb277 100644 --- a/Engine/source/platformMac/macMath.mm +++ b/Engine/source/platformMac/macMath.mm @@ -27,8 +27,6 @@ #include "console/engineAPI.h" extern void mInstallLibrary_C(); -extern void mInstallLibrary_Vec(); -extern void mInstall_Library_SSE(); static MRandomLCG sgPlatRandom; @@ -115,7 +113,6 @@ void Math::init(U32 properties) if( properties & CPU_PROP_SSE ) { Con::printf( " Installing SSE extensions" ); - mInstall_Library_SSE(); } #endif diff --git a/Engine/source/platformPOSIX/POSIXCPUInfo.cpp b/Engine/source/platformPOSIX/POSIXCPUInfo.cpp index 279d23f1a..679096bd7 100644 --- a/Engine/source/platformPOSIX/POSIXCPUInfo.cpp +++ b/Engine/source/platformPOSIX/POSIXCPUInfo.cpp @@ -22,30 +22,233 @@ #ifndef __APPLE__ +#include +#include +#include +#include +#include + #include "platform/platform.h" #include "platformPOSIX/platformPOSIX.h" -#include "console/console.h" -#include "core/stringTable.h" -#include "core/strings/stringFunctions.h" -#include - #include "platform/platformCPUCount.h" + +#include "console/console.h" + #include Platform::SystemInfo_struct Platform::SystemInfo; -void Processor::init() {} +static inline void rtrim(std::string &s) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); +} + +static inline void ltrim(std::string &s) +{ + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); +} + +static void getCPUInformation() +{ + std::string vendorString; + std::string brandString; + + std::ifstream cpuInfo("/proc/cpuinfo"); + + U32 logicalCoreCount = 0; + U32 physicalCoreCount = 1; + + if (cpuInfo.is_open()) + { + // Load every line of the CPU Info + std::string line; + + while (std::getline(cpuInfo, line)) + { + std::string fieldName = line.substr(0, line.find(":")); + rtrim(fieldName); + + // Entries are newline separated + if (fieldName == "") + { + ++logicalCoreCount; + continue; + } + + std::string fieldValue = line.substr(line.find(":") + 1, line.length()); + ltrim(fieldValue); + rtrim(fieldValue); + + // Load fields + if (fieldName == "vendor_id") + { + vendorString = fieldValue.c_str(); + } + else if (fieldName == "model name") + { + brandString = fieldValue.c_str(); + } + else if (fieldName == "cpu cores") + { + physicalCoreCount = dAtoui(fieldValue.c_str()); + } + else if (fieldName == "flags") + { + std::vector flags; + std::istringstream flagStream(fieldValue); + + std::string currentFlag; + while (std::getline(flagStream, currentFlag, ' ')) + { + flags.push_back(currentFlag); + } + + // Set CPU flags + if (std::find(flags.begin(), flags.end(), "fpu") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_FPU; + } + + if (std::find(flags.begin(), flags.end(), "sse3") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3; + } + + if (std::find(flags.begin(), flags.end(), "avx") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_AVX; + } + + if (std::find(flags.begin(), flags.end(), "ssse3") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3ex; + } + + if (std::find(flags.begin(), flags.end(), "sse") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE; + } + + if (std::find(flags.begin(), flags.end(), "sse2") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE2; + } + + if (std::find(flags.begin(), flags.end(), "sse4_1") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_1; + } + + if (std::find(flags.begin(), flags.end(), "sse4_2") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_2; + } + + if (std::find(flags.begin(), flags.end(), "mmx") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_MMX; + } + } + } + + cpuInfo.close(); + } + else + { + logicalCoreCount = 1; + } + + Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount; + Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount; + Platform::SystemInfo.processor.isHyperThreaded = logicalCoreCount != physicalCoreCount; + Platform::SystemInfo.processor.isMultiCore = physicalCoreCount != 1; + Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount; + Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount; + if (Platform::SystemInfo.processor.isMultiCore) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_MP; + } + + // Load processor base frequency + std::ifstream baseFrequencyStream("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency"); + if (baseFrequencyStream.is_open()) + { + U32 baseFrequencyKHz = 0; + baseFrequencyStream >> baseFrequencyKHz; + + Platform::SystemInfo.processor.mhz = baseFrequencyKHz / 1000; + baseFrequencyStream.close(); + } + + SetProcessorInfo(Platform::SystemInfo.processor, vendorString.c_str(), brandString.c_str()); +} + +void Processor::init() +{ + getCPUInformation(); + +#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_X32) + // Set sane default information + Platform::SystemInfo.processor.properties |= CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE ; + +#elif defined(TORQUE_CPU_ARM32) || defined(TORQUE_CPU_ARM64) + Platform::SystemInfo.processor.type = CPU_ArmCompatible; + Platform::SystemInfo.processor.name = StringTable->insert("Unknown ARM Processor"); + Platform::SystemInfo.processor.properties = CPU_PROP_C; +#else +#warning Unsupported CPU +#endif + + // Set 64bit flag +#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64) + Platform::SystemInfo.processor.properties |= CPU_PROP_64bit; +#endif + + // Once CPU information is resolved, produce an output like Windows does + Con::printf("Processor Init:"); + Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); + if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) + Con::printf(" MMX detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) + Con::printf(" SSE detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) + Con::printf(" SSE2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3) + Con::printf(" SSE3 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1) + Con::printf(" SSE4.1 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2) + Con::printf(" SSE4.2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX) + Con::printf(" AVX detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex) + Con::printf(" SSE3ex detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_MP) + Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors); + + Con::printf(" "); +} -// TODO LINUX CPUInfo::CPUCount better support namespace CPUInfo { - EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum) + EConfig CPUCount(U32 &logical, U32 &physical) { - PhysicalNum = TotAvailCore = 0; - TotAvailLogical = (int)sysconf(_SC_NPROCESSORS_ONLN); + // We don't set logical or physical here because it's already been determined by this point + if (Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1) + { + return CONFIG_SingleCoreHTEnabled; + } + else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors > 1) + { + return CONFIG_MultiCoreAndHTNotCapable; + } + else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1) + { + return CONFIG_SingleCoreAndHTNotCapable; + } - return CONFIG_SingleCoreHTDisabled; + return CONFIG_MultiCoreAndHTEnabled; } -}; // namespace CPUInfo +}; // namespace CPUInfo #endif \ No newline at end of file diff --git a/Engine/source/platformPOSIX/POSIXMath.cpp b/Engine/source/platformPOSIX/POSIXMath.cpp index ef1f089a4..8f21329a3 100644 --- a/Engine/source/platformPOSIX/POSIXMath.cpp +++ b/Engine/source/platformPOSIX/POSIXMath.cpp @@ -31,11 +31,6 @@ extern void mInstallLibrary_C(); extern void mInstallLibrary_ASM(); - -extern void mInstall_AMD_Math(); -extern void mInstall_Library_SSE(); - - //-------------------------------------- DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" "@brief Install the math library with specified extensions.\n\n" @@ -70,10 +65,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" properties |= CPU_PROP_MMX; continue; } - if (dStricmp(*argv, "3DNOW") == 0) { - properties |= CPU_PROP_3DNOW; - continue; - } if (dStricmp(*argv, "SSE") == 0) { properties |= CPU_PROP_SSE; continue; @@ -112,18 +103,12 @@ void Math::init(U32 properties) if (properties & CPU_PROP_MMX) { Con::printf(" Installing MMX extensions"); - if (properties & CPU_PROP_3DNOW) - { - Con::printf(" Installing 3DNow extensions"); - mInstall_AMD_Math(); - } } #if !defined(__MWERKS__) || (__MWERKS__ >= 0x2400) if (properties & CPU_PROP_SSE) { Con::printf(" Installing SSE extensions"); - mInstall_Library_SSE(); } #endif //mwerks>2.4 diff --git a/Engine/source/platformWin32/winCPUInfo.cpp b/Engine/source/platformWin32/winCPUInfo.cpp index f8ea9ab2d..7b058fd8c 100644 --- a/Engine/source/platformWin32/winCPUInfo.cpp +++ b/Engine/source/platformWin32/winCPUInfo.cpp @@ -82,9 +82,6 @@ static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor) U32 edx = cpuInfo[3]; // edx U32 ecx = cpuInfo[2]; // ecx - if (processor.type == ProcessorType::CPU_AMD) - processor.properties |= (edx & BIT_3DNOW) ? CPU_PROP_3DNOW : 0; - processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0; processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0; processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0; @@ -137,7 +134,7 @@ void Processor::init() dMemset(brand, 0, sizeof(brand)); getBrand(brand); - SetProcessoInfo(Platform::SystemInfo.processor, vendor, brand); + SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand); detectCpuFeatures(Platform::SystemInfo.processor); U32 mhz = 1000; // default if it can't be found @@ -165,8 +162,6 @@ void Processor::init() Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) Con::printf(" MMX detected" ); - if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW) - Con::printf(" 3DNow detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) Con::printf(" SSE detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) diff --git a/Engine/source/platformWin32/winMath.cpp b/Engine/source/platformWin32/winMath.cpp index b273d25f0..44b215301 100644 --- a/Engine/source/platformWin32/winMath.cpp +++ b/Engine/source/platformWin32/winMath.cpp @@ -29,8 +29,6 @@ extern void mInstallLibrary_C(); extern void mInstallLibrary_ASM(); -extern void mInstall_AMD_Math(); -extern void mInstall_Library_SSE(); //-------------------------------------- DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" @@ -40,7 +38,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" " - 'C' Enable the C math routines. C routines are always enabled.\n\n" " - 'FPU' Enable floating point unit routines.\n\n" " - 'MMX' Enable MMX math routines.\n\n" - " - '3DNOW' Enable 3dNow! math routines.\n\n" " - 'SSE' Enable SSE math routines.\n\n" "@ingroup Math") @@ -72,10 +69,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" properties |= CPU_PROP_MMX; continue; } - if (dStricmp(str, "3DNOW") == 0) { - properties |= CPU_PROP_3DNOW; - continue; - } if (dStricmp(str, "SSE") == 0) { properties |= CPU_PROP_SSE; continue; @@ -116,17 +109,11 @@ void Math::init(U32 properties) if (properties & CPU_PROP_MMX) { Con::printf(" Installing MMX extensions"); - if (properties & CPU_PROP_3DNOW) - { - Con::printf(" Installing 3DNow extensions"); - mInstall_AMD_Math(); - } } if (properties & CPU_PROP_SSE) { Con::printf(" Installing SSE extensions"); - mInstall_Library_SSE(); } Con::printf(" ");