diff --git a/Engine/source/math/mMathAMD.cpp b/Engine/source/math/mMathAMD.cpp deleted file mode 100644 index 2df3729ef..000000000 --- a/Engine/source/math/mMathAMD.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//----------------------------------------------------------------------------- -// Copyright (c) 2012 GarageGames, LLC -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//----------------------------------------------------------------------------- - -#include "math/mMathFn.h" -#include "math/mPlane.h" -#include "math/mMatrix.h" - - -// extern void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult); -// extern void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult); - -/* not currently implemented. -void Athlon_MatrixF_x_Point3F(const F32 *m, const F32 *p, F32 *presult) -{ - m; - p; - presult; -} -*/ - -//============================================================ -// Here's the C code for MatF_x_MatF: -// note that the code below does it in a different order (optimal asm, after all!) -// -// r[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8] + a[3]*b[12]; -// r[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9] + a[3]*b[13]; -// r[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14]; -// r[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15]; -// -// r[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8] + a[7]*b[12]; -// r[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9] + a[7]*b[13]; -// r[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14]; -// r[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15]; -// -// r[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12]; -// r[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13]; -// r[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14]; -// r[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15]; -// -// r[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12]; -// r[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13]; -// r[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14]; -// r[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15]; -//============================================================ - -#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM) -#define ADD_3DNOW_FUNCS -// inlined version here. -void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result) -{ - __asm - { - femms - - mov ecx, matA - mov edx, matB - mov eax, result - - prefetch [ecx+32] ;// These may help - - prefetch [edx+32] ;// and probably don't hurt - - movq mm0,[ecx] ;// a21 | a11 - movq mm1,[ecx+8] ;// a41 | a31 - movq mm4,[edx] ;// b21 | b11 - punpckhdq mm2,mm0 ;// a21 | - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a41 | - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a11 | a11 - punpckldq mm1,mm1 ;// a31 | a31 - pfmul mm4,mm0 ;// a11*b21 | a11*b11 - punpckhdq mm2,mm2 ;// a21 | a21 - pfmul mm0,[edx+8] ;// a11*b41 | a11*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a21*b22 | a21*b12 - punpckhdq mm3,mm3 ;// a41 | a41 - pfmul mm2,[edx+24] ;// a21*b42 | a21*b32 - pfmul mm6,mm1 ;// a31*b23 | a31*b13 - pfadd mm5,mm4 ;// a21*b22 + a11*b21 | a21*b12 + a11*b11 - pfmul mm1,[edx+40] ;// a31*b43 | a31*b33 - pfadd mm2,mm0 ;// a21*b42 + a11*b41 | a21*b32 + a11*b31 - pfmul mm7,mm3 ;// a41*b24 | a41*b14 - pfadd mm6,mm5 ;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13 - pfmul mm3,[edx+56] ;// a41*b44 | a41*b34 - pfadd mm2,mm1 ;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33 - pfadd mm7,mm6 ;// a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13 - movq mm0,[ecx+16] ;// a22 | a12 - pfadd mm3,mm2 ;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33 - movq mm1,[ecx+24] ;// a42 | a32 - movq [eax],mm7 ;// r21 | r11 - movq mm4,[edx] ;// b21 | b11 - movq [eax+8],mm3 ;// r41 | r31 - - punpckhdq mm2,mm0 ;// a22 | XXX - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a42 | XXX - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a12 | a12 - punpckldq mm1,mm1 ;// a32 | a32 - pfmul mm4,mm0 ;// a12*b21 | a12*b11 - punpckhdq mm2,mm2 ;// a22 | a22 - pfmul mm0,[edx+8] ;// a12*b41 | a12*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a22*b22 | a22*b12 - punpckhdq mm3,mm3 ;// a42 | a42 - pfmul mm2,[edx+24] ;// a22*b42 | a22*b32 - pfmul mm6,mm1 ;// a32*b23 | a32*b13 - pfadd mm5,mm4 ;// a12*b21 + a22*b22 | a12*b11 + a22*b12 - pfmul mm1,[edx+40] ;// a32*b43 | a32*b33 - pfadd mm2,mm0 ;// a12*b41 + a22*b42 | a12*b11 + a22*b32 - pfmul mm7,mm3 ;// a42*b24 | a42*b14 - pfadd mm6,mm5 ;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12 - pfmul mm3,[edx+56] ;// a42*b44 | a42*b34 - pfadd mm2,mm1 ;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32 - pfadd mm7,mm6 ;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12 - movq mm0,[ecx+32] ;// a23 | a13 - pfadd mm3,mm2 ;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32 - movq mm1,[ecx+40] ;// a43 | a33 - movq [eax+16],mm7 ;// r22 | r12 - movq mm4,[edx] ;// b21 | b11 - movq [eax+24],mm3 ;// r42 | r32 - - punpckhdq mm2,mm0 ;// a23 | XXX - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a43 | XXX - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a13 | a13 - punpckldq mm1,mm1 ;// a33 | a33 - pfmul mm4,mm0 ;// a13*b21 | a13*b11 - punpckhdq mm2,mm2 ;// a23 | a23 - pfmul mm0,[edx+8] ;// a13*b41 | a13*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a23*b22 | a23*b12 - punpckhdq mm3,mm3 ;// a43 | a43 - pfmul mm2,[edx+24] ;// a23*b42 | a23*b32 - pfmul mm6,mm1 ;// a33*b23 | a33*b13 - pfadd mm5,mm4 ;// a23*b22 + a13*b21 | a23*b12 + a13*b11 - pfmul mm1,[edx+40] ;// a33*b43 | a33*b33 - pfadd mm2,mm0 ;// a13*b41 + a23*b42 | a13*b31 + a23*b32 - pfmul mm7,mm3 ;// a43*b24 | a43*b14 - pfadd mm6,mm5 ;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11 - pfmul mm3,[edx+56] ;// a43*b44 | a43*b34 - pfadd mm2,mm1 ;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32 - pfadd mm7,mm6 ;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11 - movq mm0,[ecx+48] ;// a24 | a14 - pfadd mm3,mm2 ;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32 - movq mm1,[ecx+56] ;// a44 | a34 - movq [eax+32],mm7 ;// r23 | r13 - movq mm4,[edx] ;// b21 | b11 - movq [eax+40],mm3 ;// r43 | r33 - - punpckhdq mm2,mm0 ;// a24 | XXX - movq mm5,[edx+16] ;// b22 | b12 - punpckhdq mm3,mm1 ;// a44 | XXX - movq mm6,[edx+32] ;// b23 | b13 - punpckldq mm0,mm0 ;// a14 | a14 - punpckldq mm1,mm1 ;// a34 | a34 - pfmul mm4,mm0 ;// a14*b21 | a14*b11 - punpckhdq mm2,mm2 ;// a24 | a24 - pfmul mm0,[edx+8] ;// a14*b41 | a14*b31 - movq mm7,[edx+48] ;// b24 | b14 - pfmul mm5,mm2 ;// a24*b22 | a24*b12 - punpckhdq mm3,mm3 ;// a44 | a44 - pfmul mm2,[edx+24] ;// a24*b 42 | a24*b32 - pfmul mm6,mm1 ;// a34*b23 | a34*b13 - pfadd mm5,mm4 ;// a14*b21 + a24*b22 | a14*b11 + a24*b12 - pfmul mm1,[edx+40] ;// a34*b43 | a34*b33 - pfadd mm2,mm0 ;// a14*b41 + a24*b 42 | a14*b31 + a24*b32 - pfmul mm7,mm3 ;// a44*b24 | a44*b14 - pfadd mm6,mm5 ;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12 - pfmul mm3,[edx+56] ;// a44*b44 | a44*b34 - pfadd mm2,mm1 ;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32 - pfadd mm7,mm6 ;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32 - pfadd mm3,mm2 ;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32 - movq [eax+48],mm7 ;// r24 | r14 - movq [eax+56],mm3 ;// r44 | r34 - femms - } -} -#elif defined(TORQUE_SUPPORTS_NASM) -#define ADD_3DNOW_FUNCS -extern "C" -{ - void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result); -} - -#endif - -void mInstall_AMD_Math() -{ -#if defined(ADD_3DNOW_FUNCS) - m_matF_x_matF = Athlon_MatrixF_x_MatrixF; -#endif - // m_matF_x_point3F = Athlon_MatrixF_x_Point3F; - // m_matF_x_vectorF = Athlon_MatrixF_x_VectorF; -} - diff --git a/Engine/source/math/mMathAMD_ASM.asm b/Engine/source/math/mMathAMD_ASM.asm deleted file mode 100644 index 74d3fa1ec..000000000 --- a/Engine/source/math/mMathAMD_ASM.asm +++ /dev/null @@ -1,177 +0,0 @@ -;----------------------------------------------------------------------------- -; Copyright (c) 2012 GarageGames, LLC -; -; Permission is hereby granted, free of charge, to any person obtaining a copy -; of this software and associated documentation files (the "Software"), to -; deal in the Software without restriction, including without limitation the -; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -; sell copies of the Software, and to permit persons to whom the Software is -; furnished to do so, subject to the following conditions: -; -; The above copyright notice and this permission notice shall be included in -; all copies or substantial portions of the Software. -; -; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -; IN THE SOFTWARE. -;----------------------------------------------------------------------------- - - -segment .data - -matA dd 0 -result dd 0 -matB dd 0 - -segment .text - -%macro export_fn 1 - %ifidn __OUTPUT_FORMAT__, elf - ; No underscore needed for ELF object files - global %1 - %1: - %else - global _%1 - _%1: - %endif -%endmacro - - -%define arg(x) [esp+(x*4)] - - - -;void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result) - -export_fn Athlon_MatrixF_x_MatrixF - - mov ecx, arg(1) - mov edx, arg(2) - mov eax, arg(3) - - femms - prefetch [ecx+32] ; These may help - - prefetch [edx+32] ; and probably don't hurt - - movq mm0,[ecx] ; a21 | a11 - movq mm1,[ecx+8] ; a41 | a31 - movq mm4,[edx] ; b21 | b11 - punpckhdq mm2,mm0 ; a21 | - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a41 | - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a11 | a11 - punpckldq mm1,mm1 ; a31 | a31 - pfmul mm4,mm0 ; a11*b21 | a11*b11 - punpckhdq mm2,mm2 ; a21 | a21 - pfmul mm0,[edx+8] ; a11*b41 | a11*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a21*b22 | a21*b12 - punpckhdq mm3,mm3 ; a41 | a41 - pfmul mm2,[edx+24] ; a21*b42 | a21*b32 - pfmul mm6,mm1 ; a31*b23 | a31*b13 - pfadd mm5,mm4 ; a21*b22 + a11*b21 | a21*b12 + a11*b11 - pfmul mm1,[edx+40] ; a31*b43 | a31*b33 - pfadd mm2,mm0 ; a21*b42 + a11*b41 | a21*b32 + a11*b31 - pfmul mm7,mm3 ; a41*b24 | a41*b14 - pfadd mm6,mm5 ; a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13 - pfmul mm3,[edx+56] ; a41*b44 | a41*b34 - pfadd mm2,mm1 ; a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33 - pfadd mm7,mm6 ; a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13 - movq mm0,[ecx+16] ; a22 | a12 - pfadd mm3,mm2 ; a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33 - movq mm1,[ecx+24] ; a42 | a32 - movq [eax],mm7 ; r21 | r11 - movq mm4,[edx] ; b21 | b11 - movq [eax+8],mm3 ; r41 | r31 - - punpckhdq mm2,mm0 ; a22 | XXX - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a42 | XXX - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a12 | a12 - punpckldq mm1,mm1 ; a32 | a32 - pfmul mm4,mm0 ; a12*b21 | a12*b11 - punpckhdq mm2,mm2 ; a22 | a22 - pfmul mm0,[edx+8] ; a12*b41 | a12*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a22*b22 | a22*b12 - punpckhdq mm3,mm3 ; a42 | a42 - pfmul mm2,[edx+24] ; a22*b42 | a22*b32 - pfmul mm6,mm1 ; a32*b23 | a32*b13 - pfadd mm5,mm4 ; a12*b21 + a22*b22 | a12*b11 + a22*b12 - pfmul mm1,[edx+40] ; a32*b43 | a32*b33 - pfadd mm2,mm0 ; a12*b41 + a22*b42 | a12*b11 + a22*b32 - pfmul mm7,mm3 ; a42*b24 | a42*b14 - pfadd mm6,mm5 ; a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12 - pfmul mm3,[edx+56] ; a42*b44 | a42*b34 - pfadd mm2,mm1 ; a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32 - pfadd mm7,mm6 ; a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12 - movq mm0,[ecx+32] ; a23 | a13 - pfadd mm3,mm2 ; a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32 - movq mm1,[ecx+40] ; a43 | a33 - movq [eax+16],mm7 ; r22 | r12 - movq mm4,[edx] ; b21 | b11 - movq [eax+24],mm3 ; r42 | r32 - - punpckhdq mm2,mm0 ; a23 | XXX - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a43 | XXX - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a13 | a13 - punpckldq mm1,mm1 ; a33 | a33 - pfmul mm4,mm0 ; a13*b21 | a13*b11 - punpckhdq mm2,mm2 ; a23 | a23 - pfmul mm0,[edx+8] ; a13*b41 | a13*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a23*b22 | a23*b12 - punpckhdq mm3,mm3 ; a43 | a43 - pfmul mm2,[edx+24] ; a23*b42 | a23*b32 - pfmul mm6,mm1 ; a33*b23 | a33*b13 - pfadd mm5,mm4 ; a23*b22 + a13*b21 | a23*b12 + a13*b11 - pfmul mm1,[edx+40] ; a33*b43 | a33*b33 - pfadd mm2,mm0 ; a13*b41 + a23*b42 | a13*b31 + a23*b32 - pfmul mm7,mm3 ; a43*b24 | a43*b14 - pfadd mm6,mm5 ; a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11 - pfmul mm3,[edx+56] ; a43*b44 | a43*b34 - pfadd mm2,mm1 ; a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32 - pfadd mm7,mm6 ; a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11 - movq mm0,[ecx+48] ; a24 | a14 - pfadd mm3,mm2 ; a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32 - movq mm1,[ecx+56] ; a44 | a34 - movq [eax+32],mm7 ; r23 | r13 - movq mm4,[edx] ; b21 | b11 - movq [eax+40],mm3 ; r43 | r33 - - punpckhdq mm2,mm0 ; a24 | XXX - movq mm5,[edx+16] ; b22 | b12 - punpckhdq mm3,mm1 ; a44 | XXX - movq mm6,[edx+32] ; b23 | b13 - punpckldq mm0,mm0 ; a14 | a14 - punpckldq mm1,mm1 ; a34 | a34 - pfmul mm4,mm0 ; a14*b21 | a14*b11 - punpckhdq mm2,mm2 ; a24 | a24 - pfmul mm0,[edx+8] ; a14*b41 | a14*b31 - movq mm7,[edx+48] ; b24 | b14 - pfmul mm5,mm2 ; a24*b22 | a24*b12 - punpckhdq mm3,mm3 ; a44 | a44 - pfmul mm2,[edx+24] ; a24*b 42 | a24*b32 - pfmul mm6,mm1 ; a34*b23 | a34*b13 - pfadd mm5,mm4 ; a14*b21 + a24*b22 | a14*b11 + a24*b12 - pfmul mm1,[edx+40] ; a34*b43 | a34*b33 - pfadd mm2,mm0 ; a14*b41 + a24*b 42 | a14*b31 + a24*b32 - pfmul mm7,mm3 ; a44*b24 | a44*b14 - pfadd mm6,mm5 ; a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12 - pfmul mm3,[edx+56] ; a44*b44 | a44*b34 - pfadd mm2,mm1 ; a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32 - pfadd mm7,mm6 ; a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32 - pfadd mm3,mm2 ; a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32 - movq [eax+48],mm7 ; r24 | r14 - movq [eax+56],mm3 ; r44 | r34 - femms - - ret diff --git a/Engine/source/math/test/mMatrixTest.cpp b/Engine/source/math/test/mMatrixTest.cpp index beb0b1d13..b5276ce22 100644 --- a/Engine/source/math/test/mMatrixTest.cpp +++ b/Engine/source/math/test/mMatrixTest.cpp @@ -60,19 +60,6 @@ TEST(MatrixF, MultiplyImplmentations) U32 cpuProperties = Platform::SystemInfo.processor.properties; bool same; - // Test 3D NOW! if it is available - F32 mrAMD[16]; - if (cpuProperties & CPU_PROP_3DNOW) - { - Athlon_MatrixF_x_MatrixF(m1, m2, mrAMD); - - same = true; - for (S32 i = 0; i < 16; i++) - same &= mIsEqual(mrC[i], mrAMD[i]); - - EXPECT_TRUE(same) << "Matrix multiplication verification failed. (C vs. 3D NOW!)"; - } - // Test SSE if it is available F32 mrSSE[16]; if (cpuProperties & CPU_PROP_SSE) diff --git a/Engine/source/platform/platform.h b/Engine/source/platform/platform.h index 7158c5163..acb929a9f 100644 --- a/Engine/source/platform/platform.h +++ b/Engine/source/platform/platform.h @@ -55,50 +55,11 @@ /// @note These enums must be globally scoped so that they work with the inline assembly enum ProcessorType { - // x86 CPU_X86Compatible, - CPU_Intel_Unknown, - CPU_Intel_486, - CPU_Intel_Pentium, - CPU_Intel_PentiumMMX, - CPU_Intel_PentiumPro, - CPU_Intel_PentiumII, - CPU_Intel_PentiumCeleron, - CPU_Intel_PentiumIII, - CPU_Intel_Pentium4, - CPU_Intel_PentiumM, - CPU_Intel_Core, - CPU_Intel_Core2, - CPU_Intel_Corei7Xeon, // Core i7 or Xeon - CPU_AMD_K6, - CPU_AMD_K6_2, - CPU_AMD_K6_3, - CPU_AMD_Athlon, - CPU_AMD_Phenom, - CPU_AMD_PhenomII, - CPU_AMD_Bulldozer, - CPU_AMD_Unknown, - CPU_Cyrix_6x86, - CPU_Cyrix_MediaGX, - CPU_Cyrix_6x86MX, - CPU_Cyrix_GXm, ///< Media GX w/ MMX - CPU_Cyrix_Unknown, - - // PowerPC - CPU_PowerPC_Unknown, - CPU_PowerPC_601, - CPU_PowerPC_603, - CPU_PowerPC_603e, - CPU_PowerPC_603ev, - CPU_PowerPC_604, - CPU_PowerPC_604e, - CPU_PowerPC_604ev, - CPU_PowerPC_G3, - CPU_PowerPC_G4, - CPU_PowerPC_G4_7450, - CPU_PowerPC_G4_7455, - CPU_PowerPC_G4_7447, - CPU_PowerPC_G5, + CPU_ArmCompatible, + CPU_Intel, + CPU_AMD, + CPU_Apple }; /// Properties for CPU. @@ -107,17 +68,17 @@ enum ProcessorProperties CPU_PROP_C = (1<<0), ///< We should use C fallback math functions. CPU_PROP_FPU = (1<<1), ///< Has an FPU. (It better!) CPU_PROP_MMX = (1<<2), ///< Supports MMX instruction set extension. - CPU_PROP_3DNOW = (1<<3), ///< Supports AMD 3dNow! instruction set extension. - CPU_PROP_SSE = (1<<4), ///< Supports SSE instruction set extension. - CPU_PROP_RDTSC = (1<<5), ///< Supports Read Time Stamp Counter op. - CPU_PROP_SSE2 = (1<<6), ///< Supports SSE2 instruction set extension. - CPU_PROP_SSE3 = (1<<7), ///< Supports SSE3 instruction set extension. - CPU_PROP_SSE3xt = (1<<8), ///< Supports extended SSE3 instruction set - CPU_PROP_SSE4_1 = (1<<9), ///< Supports SSE4_1 instruction set extension. - CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension. - CPU_PROP_MP = (1<<11), ///< This is a multi-processor system. - CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN. - CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable + CPU_PROP_SSE = (1<<3), ///< Supports SSE instruction set extension. + CPU_PROP_SSE2 = (1<<4), ///< Supports SSE2 instruction set extension. + CPU_PROP_SSE3 = (1<<5), ///< Supports SSE3 instruction set extension. + CPU_PROP_SSE3ex = (1<<6), ///< Supports Supplemental SSE3 instruction set + CPU_PROP_SSE4_1 = (1<<7), ///< Supports SSE4_1 instruction set extension. + CPU_PROP_SSE4_2 = (1<<8), ///< Supports SSE4_2 instruction set extension. + CPU_PROP_AVX = (1<<9), ///< Supports AVX256 instruction set extension. + CPU_PROP_MP = (1<<10), ///< This is a multi-processor system. + CPU_PROP_LE = (1<<11), ///< This processor is LITTLE ENDIAN. + CPU_PROP_64bit = (1<<12), ///< This processor is 64-bit capable + CPU_PROP_NEON = (1<<13), ///< Supports the Arm Neon instruction set extension. }; /// Processor info manager. @@ -336,7 +297,6 @@ namespace Platform bool isHyperThreaded; U32 numLogicalProcessors; U32 numPhysicalProcessors; - U32 numAvailableCores; U32 properties; // CPU type specific enum } processor; }; diff --git a/Engine/source/platform/platformCPU.cpp b/Engine/source/platform/platformCPU.cpp index 4eaa33b8d..f3246c6ed 100644 --- a/Engine/source/platform/platformCPU.cpp +++ b/Engine/source/platform/platformCPU.cpp @@ -28,264 +28,47 @@ Signal Platform::SystemInfoReady; -enum CPUFlags -{ - // EDX Register flags - BIT_FPU = BIT(0), - BIT_RDTSC = BIT(4), - BIT_MMX = BIT(23), - BIT_SSE = BIT(25), - BIT_SSE2 = BIT(26), - BIT_3DNOW = BIT(31), - - // These use a different value for comparison than the above flags (ECX Register) - BIT_SSE3 = BIT(0), - BIT_SSE3xt = BIT(9), - BIT_SSE4_1 = BIT(19), - BIT_SSE4_2 = BIT(20), -}; - // fill the specified structure with information obtained from asm code -void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, - char* vendor, U32 processor, U32 properties, U32 properties2) +void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand) { - Platform::SystemInfo.processor.properties |= (properties & BIT_FPU) ? CPU_PROP_FPU : 0; - Platform::SystemInfo.processor.properties |= (properties & BIT_RDTSC) ? CPU_PROP_RDTSC : 0; - Platform::SystemInfo.processor.properties |= (properties & BIT_MMX) ? CPU_PROP_MMX : 0; - if (dStricmp(vendor, "GenuineIntel") == 0) { - pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0; - pInfo.properties |= (properties & BIT_SSE2) ? CPU_PROP_SSE2 : 0; - pInfo.properties |= (properties2 & BIT_SSE3) ? CPU_PROP_SSE3 : 0; - pInfo.properties |= (properties2 & BIT_SSE3xt) ? CPU_PROP_SSE3xt : 0; - pInfo.properties |= (properties2 & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0; - pInfo.properties |= (properties2 & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0; - - pInfo.type = CPU_Intel_Unknown; - // switch on processor family code - switch ((processor >> 8) & 0x0f) - { - case 4: - pInfo.type = CPU_Intel_486; - pInfo.name = StringTable->insert("Intel 486 class"); - break; - - // Pentium Family - case 5: - // switch on processor model code - switch ((processor >> 4) & 0xf) - { - case 1: - case 2: - case 3: - pInfo.type = CPU_Intel_Pentium; - pInfo.name = StringTable->insert("Intel Pentium"); - break; - case 4: - pInfo.type = CPU_Intel_PentiumMMX; - pInfo.name = StringTable->insert("Intel Pentium MMX"); - break; - default: - pInfo.type = CPU_Intel_Pentium; - pInfo.name = StringTable->insert( "Intel (unknown)" ); - break; - } - break; - - // Pentium Pro/II/II family - case 6: - { - U32 extendedModel = ( processor & 0xf0000 ) >> 16; - // switch on processor model code - switch ((processor >> 4) & 0xf) - { - case 1: - pInfo.type = CPU_Intel_PentiumPro; - pInfo.name = StringTable->insert("Intel Pentium Pro"); - break; - case 3: - case 5: - pInfo.type = CPU_Intel_PentiumII; - pInfo.name = StringTable->insert("Intel Pentium II"); - break; - case 6: - pInfo.type = CPU_Intel_PentiumCeleron; - pInfo.name = StringTable->insert("Intel Pentium Celeron"); - break; - case 7: - case 8: - case 11: - pInfo.type = CPU_Intel_PentiumIII; - pInfo.name = StringTable->insert("Intel Pentium III"); - break; - case 0xA: - if( extendedModel == 1) - { - pInfo.type = CPU_Intel_Corei7Xeon; - pInfo.name = StringTable->insert( "Intel Core i7 / Xeon" ); - } - else - { - pInfo.type = CPU_Intel_PentiumIII; - pInfo.name = StringTable->insert( "Intel Pentium III Xeon" ); - } - break; - case 0xD: - if( extendedModel == 1 ) - { - pInfo.type = CPU_Intel_Corei7Xeon; - pInfo.name = StringTable->insert( "Intel Core i7 / Xeon" ); - } - else - { - pInfo.type = CPU_Intel_PentiumM; - pInfo.name = StringTable->insert( "Intel Pentium/Celeron M" ); - } - break; - case 0xE: - pInfo.type = CPU_Intel_Core; - pInfo.name = StringTable->insert( "Intel Core" ); - break; - case 0xF: - pInfo.type = CPU_Intel_Core2; - pInfo.name = StringTable->insert( "Intel Core 2" ); - break; - default: - pInfo.type = CPU_Intel_PentiumPro; - pInfo.name = StringTable->insert( "Intel (unknown)" ); - break; - } - break; - } - - // Pentium4 Family - case 0xf: - pInfo.type = CPU_Intel_Pentium4; - pInfo.name = StringTable->insert( "Intel Pentium 4" ); - break; - - default: - pInfo.type = CPU_Intel_Unknown; - pInfo.name = StringTable->insert( "Intel (unknown)" ); - break; - } + pInfo.type = CPU_Intel; + pInfo.name = StringTable->insert(brand ? brand : "Intel (Unknown)"); } //-------------------------------------- + else if (dStricmp(vendor, "AuthenticAMD") == 0) + { + pInfo.name = StringTable->insert(brand ? brand : "AMD (unknown)"); + pInfo.type = CPU_AMD; + } + else if (dStricmp(vendor, "Apple") == 0) + { + pInfo.name = StringTable->insert(brand ? brand : "Apple (unknown)"); + pInfo.type = CPU_Apple; + } else - if (dStricmp(vendor, "AuthenticAMD") == 0) - { - // AthlonXP processors support SSE - pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0; - pInfo.properties |= ( properties & BIT_SSE2 ) ? CPU_PROP_SSE2 : 0; - pInfo.properties |= (properties & BIT_3DNOW) ? CPU_PROP_3DNOW : 0; - // Phenom and PhenomII support SSE3, SSE4a - pInfo.properties |= ( properties2 & BIT_SSE3 ) ? CPU_PROP_SSE3 : 0; - pInfo.properties |= ( properties2 & BIT_SSE4_1 ) ? CPU_PROP_SSE4_1 : 0; - // switch on processor family code - switch ((processor >> 8) & 0xf) - { - // K6 Family - case 5: - // switch on processor model code - switch ((processor >> 4) & 0xf) - { - case 0: - case 1: - case 2: - case 3: - pInfo.type = CPU_AMD_K6_3; - pInfo.name = StringTable->insert("AMD K5"); - break; - case 4: - case 5: - case 6: - case 7: - pInfo.type = CPU_AMD_K6; - pInfo.name = StringTable->insert("AMD K6"); - break; - case 8: - pInfo.type = CPU_AMD_K6_2; - pInfo.name = StringTable->insert("AMD K6-2"); - break; - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - pInfo.type = CPU_AMD_K6_3; - pInfo.name = StringTable->insert("AMD K6-3"); - break; - } - break; - - // Athlon Family - case 6: - pInfo.type = CPU_AMD_Athlon; - pInfo.name = StringTable->insert("AMD Athlon"); - break; - - // Phenom Family - case 15: - pInfo.type = CPU_AMD_Phenom; - pInfo.name = StringTable->insert("AMD Phenom"); - break; - - // Phenom II Family - case 16: - pInfo.type = CPU_AMD_PhenomII; - pInfo.name = StringTable->insert("AMD Phenom II"); - break; - - // Bulldozer Family - case 17: - pInfo.type = CPU_AMD_Bulldozer; - pInfo.name = StringTable->insert("AMD Bulldozer"); - break; - - default: - pInfo.type = CPU_AMD_Unknown; - pInfo.name = StringTable->insert("AMD (unknown)"); - break; - } - } - //-------------------------------------- - else - if (dStricmp(vendor, "CyrixInstead") == 0) - { - switch (processor) - { - case 0x520: - pInfo.type = CPU_Cyrix_6x86; - pInfo.name = StringTable->insert("Cyrix 6x86"); - break; - case 0x440: - pInfo.type = CPU_Cyrix_MediaGX; - pInfo.name = StringTable->insert("Cyrix Media GX"); - break; - case 0x600: - pInfo.type = CPU_Cyrix_6x86MX; - pInfo.name = StringTable->insert("Cyrix 6x86mx/MII"); - break; - case 0x540: - pInfo.type = CPU_Cyrix_GXm; - pInfo.name = StringTable->insert("Cyrix GXm"); - break; - default: - pInfo.type = CPU_Cyrix_Unknown; - pInfo.name = StringTable->insert("Cyrix (unknown)"); - break; - } - } - + { +#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) + + pInfo.name = StringTable->insert(brand ? brand : "x86 Compatible (unknown)"); + pInfo.type = CPU_X86Compatible; + +#elif defined(TORQUE_CPU_ARM64) + pInfo.name = StringTable->insert(brand ? brand : "Arm Compatible (unknown)"); + pInfo.type = CPU_ArmCompatible; + +#else +#error "Unknown CPU Architecture" + +#endif + } + // Get multithreading caps. - - CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numAvailableCores, pInfo.numPhysicalProcessors ); + CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numPhysicalProcessors ); pInfo.isHyperThreaded = CPUInfo::isHyperThreaded( config ); pInfo.isMultiCore = CPUInfo::isMultiCore( config ); // Trigger the signal Platform::SystemInfoReady.trigger(); -} +} \ No newline at end of file diff --git a/Engine/source/platform/platformCPUCount.cpp b/Engine/source/platform/platformCPUCount.cpp deleted file mode 100644 index a3fe99d67..000000000 --- a/Engine/source/platform/platformCPUCount.cpp +++ /dev/null @@ -1,657 +0,0 @@ -// Original code is: -// Copyright (c) 2005 Intel Corporation -// All Rights Reserved -// -// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform -// The three forms of HW multithreading are: Multi-processor, Multi-core, and -// HyperThreading Technology. -// This application enumerates all the logical processors enabled by OS and BIOS, -// determine the HW topology of these enabled logical processors in the system -// using information provided by CPUID instruction. -// A multi-processing system can support any combination of the three forms of HW -// multi-threading support. The relevant topology can be identified using a -// three level decomposition of the "initial APIC ID" into -// Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of -// the topology of hardware resources and -// allow multi-threaded software to manage shared hardware resources in -// the platform to reduce resource contention - -// Multicore detection algorithm for processor and cache topology requires -// all leaf functions of CPUID instructions be available. System administrator -// must ensure BIOS settings is not configured to restrict CPUID functionalities. -//------------------------------------------------------------------------------------------------- - -#if defined(TORQUE_OS_LINUX) || defined(LINUX) - -// TODO GCC code don't compile on Release with optimizations, mover code to platform layer - -#else - -#include "platform/platform.h" -#include "platform/platformCPUCount.h" - -#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX) - -#ifdef TORQUE_OS_LINUX -// The Linux source code listing can be compiled using Linux kernel verison 2.6 -// or higher (e.g. RH 4AS-2.8 using GCC 3.4.4). -// Due to syntax variances of Linux affinity APIs with earlier kernel versions -// and dependence on glibc library versions, compilation on Linux environment -// with older kernels and compilers may require kernel patches or compiler upgrades. - -#include -#include -#include -#include -#define DWORD unsigned long -#elif defined( TORQUE_OS_WIN ) -#include -#elif defined( TORQUE_OS_MAC ) -# include -# include -#else -#error Not implemented on platform. -#endif -#include -#include - -namespace CPUInfo { - -#define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported -#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical - // processors per physical processor when execute cpuid with - // eax set to 1 -#define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one - // per physical processor when execute cpuid with - // eax set to 4. - - -#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique - // initial APIC ID for the processor this code is running on. - - - #ifndef TORQUE_OS_MAC - static U32 CpuIDSupported(void); - static U32 find_maskwidth(unsigned int); - static U32 HWD_MTSupported(void); - static U32 MaxLogicalProcPerPhysicalProc(void); - static U32 MaxCorePerPhysicalProc(void); - static U8 GetAPIC_ID(void); - static U8 GetNzbSubID(U8, U8, U8); - #endif - - static char g_s3Levels[2048]; - -#ifndef TORQUE_OS_MAC - - // - // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return - // the maximum supported standard function. - // - static U32 CpuIDSupported(void) - { - U32 maxInputValue = 0; - // If CPUID instruction is supported -#ifdef TORQUE_COMPILER_GCC - try - { - // call cpuid with eax = 0 - asm - ( - "pushl %%ebx\n\t" - "xorl %%eax,%%eax\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=a" (maxInputValue) - : - : "%ecx", "%edx" - ); - } - catch (...) - { - return(0); // cpuid instruction is unavailable - } -#elif defined( TORQUE_COMPILER_VISUALC ) - try - { - // call cpuid with eax = 0 - __asm - { - xor eax, eax - cpuid - mov maxInputValue, eax - } - } - catch (...) - { - // cpuid instruction is unavailable - } -#else -# error Not implemented. -#endif - - return maxInputValue; - } - - - - // - // Function returns the maximum cores per physical package. Note that the number of - // AVAILABLE cores per physical to be used by an application might be less than this - // maximum value. - // - - static U32 MaxCorePerPhysicalProc(void) - { - - U32 Regeax = 0; - - if (!HWD_MTSupported()) return (U32) 1; // Single core -#ifdef TORQUE_COMPILER_GCC - { - asm - ( - "pushl %ebx\n\t" - "xorl %eax, %eax\n\t" - "cpuid\n\t" - "cmpl $4, %eax\n\t" // check if cpuid supports leaf 4 - "jl .single_core\n\t" // Single core - "movl $4, %eax\n\t" - "movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports - "popl %ebx\n\t" - ); // at least one valid cache level - asm - ( - "cpuid" - : "=a" (Regeax) - : - : "%ecx", "%edx" - ); - asm - ( - "jmp .multi_core\n" - ".single_core:\n\t" - "xor %eax, %eax\n" - ".multi_core:" - ); - } -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - xor eax, eax - cpuid - cmp eax, 4 // check if cpuid supports leaf 4 - jl single_core // Single core - mov eax, 4 - mov ecx, 0 // start with index = 0; Leaf 4 reports - cpuid // at least one valid cache level - mov Regeax, eax - jmp multi_core - -single_core: - xor eax, eax - -multi_core: - - } -#else -# error Not implemented. -#endif - return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1; - - } - - - - // - // The function returns 0 when the hardware multi-threaded bit is not set. - // - static U32 HWD_MTSupported(void) - { - - - U32 Regedx = 0; - - - if ((CpuIDSupported() >= 1)) - { -#ifdef TORQUE_COMPILER_GCC - asm - ( - "pushl %%ebx\n\t" - "movl $1,%%eax\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=d" (Regedx) - : - : "%eax","%ecx" - ); -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, 1 - cpuid - mov Regedx, edx - } -#else -# error Not implemented. -#endif - } - - return (Regedx & HWD_MT_BIT); - - - } - - - - // - // Function returns the maximum logical processors per physical package. Note that the number of - // AVAILABLE logical processors per physical to be used by an application might be less than this - // maximum value. - // - static U32 MaxLogicalProcPerPhysicalProc(void) - { - - U32 Regebx = 0; - - if (!HWD_MTSupported()) return (U32) 1; -#ifdef TORQUE_COMPILER_GCC - asm - ( - "movl $1,%%eax\n\t" - "cpuid" - : "=b" (Regebx) - : - : "%eax","%ecx","%edx" - ); -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, 1 - cpuid - mov Regebx, ebx - } -#else -# error Not implemented. -#endif - return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16); - - } - - - static U8 GetAPIC_ID(void) - { - - U32 Regebx = 0; -#ifdef TORQUE_COMPILER_GCC - asm - ( - "movl $1, %%eax\n\t" - "cpuid" - : "=b" (Regebx) - : - : "%eax","%ecx","%edx" - ); - -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, 1 - cpuid - mov Regebx, ebx - } -#else -# error Not implemented. -#endif - - return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24); - - } - - // - // Determine the width of the bit field that can represent the value count_item. - // - U32 find_maskwidth(U32 CountItem) - { - U32 MaskWidth, - count = CountItem; -#ifdef TORQUE_COMPILER_GCC - asm - ( -#ifdef __x86_64__ // define constant to compile - "push %%rcx\n\t" // under 64-bit Linux - "push %%rax\n\t" -#else - "pushl %%ecx\n\t" - "pushl %%eax\n\t" -#endif - // "movl $count, %%eax\n\t" //done by Assembler below - "xorl %%ecx, %%ecx" - // "movl %%ecx, MaskWidth\n\t" //done by Assembler below - : "=c" (MaskWidth) - : "a" (count) - // : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler - //to put them back when we are done - ); - asm - ( - "decl %%eax\n\t" - "bsrw %%ax,%%cx\n\t" - "jz next\n\t" - "incw %%cx\n\t" - // "movl %%ecx, MaskWidth\n" //done by Assembler below - : "=c" (MaskWidth) - : - ); - asm - ( - "next:\n\t" -#ifdef __x86_64__ - "pop %rax\n\t" - "pop %rcx" -#else - "popl %eax\n\t" - "popl %ecx" -#endif - ); - -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, count - mov ecx, 0 - mov MaskWidth, ecx - dec eax - bsr cx, ax - jz next - inc cx - mov MaskWidth, ecx -next: - - } -#else -# error Not implemented. -#endif - return MaskWidth; - } - - - // - // Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value - // - static U8 GetNzbSubID(U8 FullID, - U8 MaxSubIDValue, - U8 ShiftCount) - { - U32 MaskWidth; - U8 MaskBits; - - MaskWidth = find_maskwidth((U32) MaxSubIDValue); - MaskBits = (0xff << ShiftCount) ^ - ((U8) (0xff << (ShiftCount + MaskWidth))); - - return (FullID & MaskBits); - } - -#endif - - - // - // - // - EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum) - { - EConfig StatusFlag = CONFIG_UserConfigIssue; - - g_s3Levels[0] = 0; - TotAvailCore = 1; - PhysicalNum = 1; - - U32 numLPEnabled = 0; - S32 MaxLPPerCore = 1; - -#ifdef TORQUE_OS_MAC - - //FIXME: This isn't a proper port but more or less just some sneaky cheating - // to get around having to mess with yet another crap UNIX-style API. Seems - // like there isn't a way to do this that's working across all OSX incarnations - // and machine configurations anyway. - - S32 numCPUs; - S32 numPackages; - - // Get the number of CPUs. - - size_t len = sizeof( numCPUs ); - if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 ) - return CONFIG_UserConfigIssue; - - // Get the number of packages. - len = sizeof( numPackages ); - if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 ) - return CONFIG_UserConfigIssue; - - TotAvailCore = numCPUs; - TotAvailLogical = numCPUs; - PhysicalNum = numPackages; -#else - - U32 dwAffinityMask; - S32 j = 0; - U8 apicID, PackageIDMask; - U8 tblPkgID[256], tblCoreID[256], tblSMTID[256]; - char tmp[256]; - -#ifdef TORQUE_OS_LINUX - //we need to make sure that this process is allowed to run on - //all of the logical processors that the OS itself can run on. - //A process could acquire/inherit affinity settings that restricts the - // current process to run on a subset of all logical processor visible to OS. - - // Linux doesn't easily allow us to look at the Affinity Bitmask directly, - // but it does provide an API to test affinity maskbits of the current process - // against each logical processor visible under OS. - S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many - //CPUs are currently enabled. - - //this will tell us which processors this process can run on. - cpu_set_t allowedCPUs; - sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs); - - for (S32 i = 0; i < sysNumProcs; i++ ) - { - if ( CPU_ISSET(i, &allowedCPUs) == 0 ) - return CONFIG_UserConfigIssue; - } -#elif defined( TORQUE_OS_WIN ) - DWORD dwProcessAffinity, dwSystemAffinity; - GetProcessAffinityMask(GetCurrentProcess(), - &dwProcessAffinity, - &dwSystemAffinity); - if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled - return CONFIG_UserConfigIssue; -#else -# error Not implemented. -#endif - - // Assume that cores within a package have the SAME number of - // logical processors. Also, values returned by - // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have - // to be power of 2. - - MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc(); - dwAffinityMask = 1; - -#ifdef TORQUE_OS_LINUX - cpu_set_t currentCPU; - while ( j < sysNumProcs ) - { - CPU_ZERO(¤tCPU); - CPU_SET(j, ¤tCPU); - if ( sched_setaffinity (0, sizeof(currentCPU), ¤tCPU) == 0 ) - { - sleep(0); // Ensure system to switch to the right CPU -#elif defined( TORQUE_OS_WIN ) - while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity) - { - if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask)) - { - Sleep(0); // Ensure system to switch to the right CPU -#else -# error Not implemented. -#endif - apicID = GetAPIC_ID(); - - - // Store SMT ID and core ID of each logical processor - // Shift vlaue for SMT ID is 0 - // Shift value for core ID is the mask width for maximum logical - // processors per core - - tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0); - U8 maxCorePPP = MaxCorePerPhysicalProc(); - U8 maskWidth = find_maskwidth(MaxLPPerCore); - tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth); - - // Extract package ID, assume single cluster. - // Shift value is the mask width for max Logical per package - - PackageIDMask = (unsigned char) (0xff << - find_maskwidth(MaxLogicalProcPerPhysicalProc())); - - tblPkgID[j] = apicID & PackageIDMask; - sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n", - dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]); - dStrcat(g_s3Levels, tmp, 2048); - - numLPEnabled ++; // Number of available logical processors in the system. - - } // if - - j++; - dwAffinityMask = 1 << j; - } // while - - // restore the affinity setting to its original state -#ifdef TORQUE_OS_LINUX - sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs); - sleep(0); -#elif defined( TORQUE_OS_WIN ) - SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity); - Sleep(0); -#else -# error Not implemented. -#endif - TotAvailLogical = numLPEnabled; - - // - // Count available cores (TotAvailCore) in the system - // - U8 CoreIDBucket[256]; - DWORD ProcessorMask, pCoreMask[256]; - U32 i, ProcessorNum; - - CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0]; - ProcessorMask = 1; - pCoreMask[0] = ProcessorMask; - - for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++) - { - ProcessorMask <<= 1; - for (i = 0; i < TotAvailCore; i++) - { - // Comparing bit-fields of logical processors residing in different packages - // Assuming the bit-masks are the same on all processors in the system. - if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i]) - { - pCoreMask[i] |= ProcessorMask; - break; - } - - } // for i - - if (i == TotAvailCore) // did not match any bucket. Start a new one. - { - CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]; - pCoreMask[i] = ProcessorMask; - - TotAvailCore++; // Number of available cores in the system - - } - - } // for ProcessorNum - - - // - // Count physical processor (PhysicalNum) in the system - // - U8 PackageIDBucket[256]; - DWORD pPackageMask[256]; - - PackageIDBucket[0] = tblPkgID[0]; - ProcessorMask = 1; - pPackageMask[0] = ProcessorMask; - - for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++) - { - ProcessorMask <<= 1; - for (i = 0; i < PhysicalNum; i++) - { - // Comparing bit-fields of logical processors residing in different packages - // Assuming the bit-masks are the same on all processors in the system. - if (tblPkgID[ProcessorNum]== PackageIDBucket[i]) - { - pPackageMask[i] |= ProcessorMask; - break; - } - - } // for i - - if (i == PhysicalNum) // did not match any bucket. Start a new one. - { - PackageIDBucket[i] = tblPkgID[ProcessorNum]; - pPackageMask[i] = ProcessorMask; - - PhysicalNum++; // Total number of physical processors in the system - - } - - } // for ProcessorNum -#endif - - // - // Check to see if the system is multi-core - // Check if the system is hyper-threading - // - if (TotAvailCore > PhysicalNum) - { - // Multi-core - if (MaxLPPerCore == 1) - StatusFlag = CONFIG_MultiCoreAndHTNotCapable; - else if (numLPEnabled > TotAvailCore) - StatusFlag = CONFIG_MultiCoreAndHTEnabled; - else StatusFlag = CONFIG_MultiCoreAndHTDisabled; - - } - else - { - // Single-core - if (MaxLPPerCore == 1) - StatusFlag = CONFIG_SingleCoreAndHTNotCapable; - else if (numLPEnabled > TotAvailCore) - StatusFlag = CONFIG_SingleCoreHTEnabled; - else StatusFlag = CONFIG_SingleCoreHTDisabled; - - - } - - - - return StatusFlag; - } - -} // namespace CPUInfo -#endif - -#endif diff --git a/Engine/source/platform/platformCPUCount.h b/Engine/source/platform/platformCPUCount.h index 2ee07c2eb..66a0c2e12 100644 --- a/Engine/source/platform/platformCPUCount.h +++ b/Engine/source/platform/platformCPUCount.h @@ -29,13 +29,10 @@ namespace CPUInfo { enum EConfig { - CONFIG_UserConfigIssue, CONFIG_SingleCoreHTEnabled, - CONFIG_SingleCoreHTDisabled, CONFIG_SingleCoreAndHTNotCapable, CONFIG_MultiCoreAndHTNotCapable, CONFIG_MultiCoreAndHTEnabled, - CONFIG_MultiCoreAndHTDisabled, }; inline bool isMultiCore( EConfig config ) @@ -44,7 +41,6 @@ namespace CPUInfo { case CONFIG_MultiCoreAndHTNotCapable: case CONFIG_MultiCoreAndHTEnabled: - case CONFIG_MultiCoreAndHTDisabled: return true; default: @@ -65,11 +61,10 @@ namespace CPUInfo } } - EConfig CPUCount( U32& totalAvailableLogical, - U32& totalAvailableCores, - U32& numPhysical ); - + EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores ); } // namespace CPUInfo +void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand); + #endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_ diff --git a/Engine/source/platform/platformCPUInfo.asm b/Engine/source/platform/platformCPUInfo.asm deleted file mode 100644 index bce39d220..000000000 --- a/Engine/source/platform/platformCPUInfo.asm +++ /dev/null @@ -1,128 +0,0 @@ -;----------------------------------------------------------------------------- -; Copyright (c) 2012 GarageGames, LLC -; -; Permission is hereby granted, free of charge, to any person obtaining a copy -; of this software and associated documentation files (the "Software"), to -; deal in the Software without restriction, including without limitation the -; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -; sell copies of the Software, and to permit persons to whom the Software is -; furnished to do so, subject to the following conditions: -; -; The above copyright notice and this permission notice shall be included in -; all copies or substantial portions of the Software. -; -; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -; IN THE SOFTWARE. -;----------------------------------------------------------------------------- - - -segment .text - -; syntax: export_fn -%macro export_fn 1 - %ifidn __OUTPUT_FORMAT__, elf - ; No underscore needed for ELF object files - global %1 - %1: - %else - global _%1 - _%1: - %endif -%endmacro - -; push registers -%macro pushreg 0 -; pushad - push ebx - push ebp - push esi - push edi -%endmacro - -; pop registers -%macro popreg 0 - pop edi - pop esi - pop ebp - pop ebx -; popad -%endmacro - -; void detectX86CPUInfo(char *vendor, U32 *processor, U32 *properties); -export_fn detectX86CPUInfo - push ebp - mov ebp, esp - - pushreg - - push edx - push ecx - pushfd - pushfd ; save EFLAGS to stack - pop eax ; move EFLAGS into EAX - mov ebx, eax - xor eax, 0x200000 ; flip bit 21 - push eax - popfd ; restore EFLAGS - pushfd - pop eax - cmp eax, ebx - jz EXIT ; doesn't support CPUID instruction - - ; - ; get vendor information using CPUID eax == 0 - xor eax, eax - cpuid - - ; store the vendor tag (12 bytes in ebx, edx, ecx) in the first parameter, - ; which should be a char[13] - push eax ; save eax - mov eax, [ebp+8] ; store the char* address in eax - mov [eax], ebx ; move ebx into the first 4 bytes - add eax, 4 ; advance the char* 4 bytes - mov [eax], edx ; move edx into the next 4 bytes - add eax, 4 ; advance the char* 4 bytes - mov [eax], ecx ; move ecx into the last 4 bytes - pop eax ; restore eax - - ; get generic extended CPUID info - mov eax, 1 - cpuid ; eax=1, so cpuid queries feature information - - and eax, 0x0fff3fff - push ecx - mov ecx, [ebp+12] - mov [ecx], eax ; just store the model bits in processor param - mov ecx, [ebp+16] - mov [ecx], edx ; set properties param - pop ecx - - ; want to check for 3DNow(tm). - ; need to see if extended cpuid functions present. - mov eax, 0x80000000 - cpuid - cmp eax, 0x80000000 - jbe MAYBE_3DLATER - mov eax, 0x80000001 - cpuid - ; 3DNow if bit 31 set -> put bit in our properties - and edx, 0x80000000 - push eax - mov eax, [ebp+16] - or [eax], edx - pop eax -MAYBE_3DLATER: -EXIT: - popfd - pop ecx - pop edx - - popreg - - pop ebp - ret diff --git a/Engine/source/platform/threads/threadPool.cpp b/Engine/source/platform/threads/threadPool.cpp index 86402522f..1cad50dda 100644 --- a/Engine/source/platform/threads/threadPool.cpp +++ b/Engine/source/platform/threads/threadPool.cpp @@ -322,10 +322,9 @@ ThreadPool::ThreadPool( const char* name, U32 numThreads ) // Platform::SystemInfo will not yet have been initialized. U32 numLogical = 0; - U32 numPhysical = 0; U32 numCores = 0; - CPUInfo::CPUCount( numLogical, numCores, numPhysical ); + CPUInfo::CPUCount( numLogical, numCores ); const U32 baseCount = getMax( numLogical, numCores ); mNumThreads = (baseCount > 0) ? baseCount : 2; diff --git a/Engine/source/platformMac/macCPU.mm b/Engine/source/platformMac/macCPU.mm index 24a1a8a62..001d537a0 100644 --- a/Engine/source/platformMac/macCPU.mm +++ b/Engine/source/platformMac/macCPU.mm @@ -35,15 +35,6 @@ // we now have to use NSProcessInfo #import -//recently removed in Xcode 8 - most likely don't need these anymore -#ifndef CPUFAMILY_INTEL_YONAH -#define CPUFAMILY_INTEL_YONAH 0x73d67300 -#endif - -#ifndef CPUFAMILY_INTEL_MEROM -#define CPUFAMILY_INTEL_MEROM 0x426f69ef -#endif - // Original code by Sean O'Brien (http://www.garagegames.com/community/forums/viewthread/81815). @@ -89,204 +80,183 @@ int _getSysCTLvalue(const char key[], T * dest) { Platform::SystemInfo_struct Platform::SystemInfo; -#define BASE_MHZ_SPEED 0 -//TODO update cpu list +#define BASE_MHZ_SPEED 1000 +#define BASE_APPLE_SILICON_MHZ_SPEED 3200 + +static void detectCpuFeatures(U32 &procflags) +{ + // Now we can directly query the system about a litany of "Optional" processor capabilities + // and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request + // a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists + // >>>> BUT <<<<< + // it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's + // actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined. + + int err; + U32 lraw; + + // All Cpus have fpu + procflags = CPU_PROP_C | CPU_PROP_FPU; + +#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) + + // List of chip-specific features + err = _getSysCTLvalue("hw.optional.mmx", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_MMX; + err = _getSysCTLvalue("hw.optional.sse", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE; + err = _getSysCTLvalue("hw.optional.sse2", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE2; + err = _getSysCTLvalue("hw.optional.sse3", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE3; + err = _getSysCTLvalue("hw.optional.supplementalsse3", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE3ex; + err = _getSysCTLvalue("hw.optional.sse4_1", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE4_1; + err = _getSysCTLvalue("hw.optional.sse4_2", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE4_2; + err = _getSysCTLvalue("hw.optional.avx1_0", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_AVX; + +#elif defined(TORQUE_CPU_ARM64) + + err = _getSysCTLvalue("hw.optional.neon", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_NEON; + +#endif + + err = _getSysCTLvalue("hw.ncpu", &lraw); + if ((err==0)&&(lraw>1)) + procflags |= CPU_PROP_MP; + err = _getSysCTLvalue("hw.cpu64bit_capable", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_64bit; + err = _getSysCTLvalue("hw.byteorder", &lraw); + if ((err==0)&&(lraw==1234)) + procflags |= CPU_PROP_LE; +} + void Processor::init() { - U32 procflags; + U32 procflags = 0; int err, cpufam, cputype, cpusub; char buf[255]; U32 lraw; U64 llraw; - - Con::printf( "System & Processor Information:" ); - // Gestalt has been deprecated since Mac OSX Mountain Lion and has stopped working on - // Mac OSX Yosemite. we have to use NSProcessInfo now. // Availability: Mac OS 10.2 or greater. NSString *osVersionStr = [[NSProcessInfo processInfo] operatingSystemVersionString]; - Con::printf( " OSX Version: %s", [osVersionStr UTF8String]); - - err = _getSysCTLstring("kern.ostype", buf, sizeof(buf)); - if (err) - Con::printf( " Unable to determine OS type\n" ); - else - Con::printf( " Mac OS Kernel name: %s", buf); - - err = _getSysCTLstring("kern.osrelease", buf, sizeof(buf)); - if (err) - Con::printf( " Unable to determine OS release number\n" ); - else - Con::printf( " Mac OS Kernel version: %s", buf ); - + + S32 ramMB; err = _getSysCTLvalue("hw.memsize", &llraw); if (err) - Con::printf( " Unable to determine amount of physical RAM\n" ); + ramMB = 512; else - Con::printf( " Physical memory installed: %d MB", (llraw >> 20)); + ramMB = llraw >> 20; - err = _getSysCTLvalue("hw.usermem", &lraw); - if (err) - Con::printf( " Unable to determine available user address space\n"); - else - Con::printf( " Addressable user memory: %d MB", (lraw >> 20)); - - //////////////////////////////// - // Values for the Family Type, CPU Type and CPU Subtype are defined in the - // SDK files for the Mach Kernel ==> mach/machine.h - //////////////////////////////// - - // CPU Family, Type, and Subtype - cpufam = 0; - cputype = 0; - cpusub = 0; - err = _getSysCTLvalue("hw.cpufamily", &lraw); - if (err) - Con::printf( " Unable to determine 'family' of CPU\n"); - else { - cpufam = (int) lraw; - err = _getSysCTLvalue("hw.cputype", &lraw); - if (err) - Con::printf( " Unable to determine CPU type\n"); - else { - cputype = (int) lraw; - err = _getSysCTLvalue("hw.cpusubtype", &lraw); - if (err) - Con::printf( " Unable to determine CPU subtype\n"); - else - cpusub = (int) lraw; - // If we've made it this far, - Con::printf( " Installed processor ID: Family 0x%08x Type %d Subtype %d",cpufam, cputype,cpusub); - } - } - - // The Gestalt version was known to have issues with some Processor Upgrade cards - // but it is uncertain whether this version has similar issues. + char brandString[256]; + err = _getSysCTLstring("machdep.cpu.brand_string", brandString, sizeof(brandString)); + if (err) + brandString[0] = '\0'; + + char vendor[256]; + err = _getSysCTLstring("machdep.cpu.vendor", vendor, sizeof(vendor)); + if (err) + vendor[0] = '\0'; + + // Note: hw.cpufrequency seems to be missing on the M1. For Apple Silicon, + // we will assume the base frequency of the M1 which is 3.2ghz err = _getSysCTLvalue("hw.cpufrequency", &llraw); if (err) { +#if defined(TORQUE_CPU_ARM64) + llraw = BASE_APPLE_SILICON_MHZ_SPEED; +#else llraw = BASE_MHZ_SPEED; - Con::printf( " Unable to determine CPU Frequency. Defaulting to %d MHz\n", llraw); +#endif } else { llraw /= 1000000; - Con::printf( " Installed processor clock frequency: %d MHz", llraw); } Platform::SystemInfo.processor.mhz = (unsigned int)llraw; - // Here's one that the original version of this routine couldn't do -- number - // of processors (cores) - U32 ncpu = 1; - err = _getSysCTLvalue("hw.ncpu", &lraw); - if (err) - Con::printf( " Unable to determine number of processor cores\n"); - else - { - ncpu = lraw; - Con::printf( " Installed/available processor cores: %d", lraw); - } - - // Now use CPUFAM to determine and then store the processor type - // and 'friendly name' in GG-accessible structure. Note that since - // we have access to the Family code, the Type and Subtypes are useless. - // - // NOTE: Even this level of detail is almost assuredly not needed anymore - // and the Optional Capability flags (further down) should be more than enough. - switch(cpufam) - { - case CPUFAMILY_INTEL_YONAH: - Platform::SystemInfo.processor.type = CPU_Intel_Core; - if( ncpu == 2 ) - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core Duo"); - else - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core"); - break; - case CPUFAMILY_INTEL_PENRYN: - case CPUFAMILY_INTEL_MEROM: - Platform::SystemInfo.processor.type = CPU_Intel_Core2; - if( ncpu == 4 ) - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Quad"); - else - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Duo"); - break; - - case CPUFAMILY_INTEL_NEHALEM: - Platform::SystemInfo.processor.type = CPU_Intel_Core2; - Platform::SystemInfo.processor.name = StringTable->insert( "Intel 'Nehalem' Core Processor" ); - break; - - default: - // explain why we can't get the processor type. - Con::warnf( " Unknown Processor (family, type, subtype): 0x%x\t%d %d", cpufam, cputype, cpusub); - // for now, identify it as an x86 processor, because Apple is moving to Intel chips... - Platform::SystemInfo.processor.type = CPU_X86Compatible; - Platform::SystemInfo.processor.name = StringTable->insert("Unknown Processor, assuming x86 Compatible"); - break; - } - // Now we can directly query the system about a litany of "Optional" processor capabilities - // and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request - // a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists - // >>>> BUT <<<<< - // it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's - // actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined. - procflags = 0; - // Seriously this one should be an Assert() - err = _getSysCTLvalue("hw.optional.floatingpoint", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_FPU; - // List of chip-specific features - err = _getSysCTLvalue("hw.optional.mmx", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_MMX; - err = _getSysCTLvalue("hw.optional.sse", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE; - err = _getSysCTLvalue("hw.optional.sse2", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE2; - err = _getSysCTLvalue("hw.optional.sse3", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3; - err = _getSysCTLvalue("hw.optional.supplementalsse3", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3xt; - err = _getSysCTLvalue("hw.optional.sse4_1", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_1; - err = _getSysCTLvalue("hw.optional.sse4_2", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_2; - - // Finally some architecture-wide settings - err = _getSysCTLvalue("hw.ncpu", &lraw); - if ((err==0)&&(lraw>1)) procflags |= CPU_PROP_MP; - err = _getSysCTLvalue("hw.cpu64bit_capable", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_64bit; - err = _getSysCTLvalue("hw.byteorder", &lraw); - if ((err==0)&&(lraw==1234)) procflags |= CPU_PROP_LE; - - Platform::SystemInfo.processor.properties = procflags; - - Con::printf( "%s, %2.2f GHz", Platform::SystemInfo.processor.name, F32( Platform::SystemInfo.processor.mhz ) / 1000.0 ); + detectCpuFeatures(procflags); + + Platform::SystemInfo.processor.properties = procflags; + SetProcessorInfo(Platform::SystemInfo.processor, vendor, brandString); + + + Con::printf("System & Processor Information:"); + Con::printf(" MacOS Version: %s", [osVersionStr UTF8String]); + Con::printf(" Physical memory installed: %d MB", ramMB); + Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) - Con::printf( " MMX detected"); + Con::printf(" MMX detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) - Con::printf( " SSE detected"); + Con::printf(" SSE detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) - Con::printf( " SSE2 detected"); + Con::printf(" SSE2 detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3) - Con::printf( " SSE3 detected"); + Con::printf(" SSE3 detected"); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex) + Con::printf(" SSE3ex detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1) - Con::printf( " SSE4.1 detected"); + Con::printf(" SSE4.1 detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2) - Con::printf( " SSE4.2 detected"); - + Con::printf(" SSE4.2 detected"); + if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX) + Con::printf(" AVX detected"); + if (Platform::SystemInfo.processor.properties & CPU_PROP_NEON) + Con::printf(" Neon detected"); + + if (Platform::SystemInfo.processor.properties & CPU_PROP_MP) + Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors); + Con::printf( "" ); // Trigger the signal Platform::SystemInfoReady.trigger(); } + namespace CPUInfo { - EConfig CPUCount(U32 &logical, U32 &numCores, U32 &numPhysical) { - // todo properly implement this - logical = [[NSProcessInfo processInfo] activeProcessorCount]; - numCores = [[NSProcessInfo processInfo] activeProcessorCount]; - numPhysical = [[NSProcessInfo processInfo] processorCount]; + EConfig CPUCount(U32 &logical, U32 &physical) { + U32 lraw; + int err; - // todo check for hyperthreading - if (numCores > 1) - return CONFIG_MultiCoreAndHTNotCapable; - return CONFIG_SingleCoreAndHTNotCapable; + err = _getSysCTLvalue("hw.physicalcpu", &lraw); + if (err == 0) + physical = lraw; + else + physical = 1; + + err = _getSysCTLvalue("hw.logicalcpu", &lraw); + if (err == 0) + { + logical = lraw; + } + else + { + // fallback to querying the number of cpus. If that fails, then assume same as number of cores + err = _getSysCTLvalue("hw.ncpu", &lraw); + if (err == 0) + logical = lraw; + else + logical = physical; + } + + const bool smtEnabled = logical > physical; + + if (physical == 1) + return smtEnabled ? CONFIG_SingleCoreHTEnabled : CONFIG_SingleCoreAndHTNotCapable; + + return smtEnabled ? CONFIG_MultiCoreAndHTEnabled : CONFIG_MultiCoreAndHTNotCapable; } } diff --git a/Engine/source/platformMac/macMath.mm b/Engine/source/platformMac/macMath.mm index 542fa27c8..4feefb277 100644 --- a/Engine/source/platformMac/macMath.mm +++ b/Engine/source/platformMac/macMath.mm @@ -27,8 +27,6 @@ #include "console/engineAPI.h" extern void mInstallLibrary_C(); -extern void mInstallLibrary_Vec(); -extern void mInstall_Library_SSE(); static MRandomLCG sgPlatRandom; @@ -115,7 +113,6 @@ void Math::init(U32 properties) if( properties & CPU_PROP_SSE ) { Con::printf( " Installing SSE extensions" ); - mInstall_Library_SSE(); } #endif diff --git a/Engine/source/platformPOSIX/POSIXCPUInfo.cpp b/Engine/source/platformPOSIX/POSIXCPUInfo.cpp index 279d23f1a..679096bd7 100644 --- a/Engine/source/platformPOSIX/POSIXCPUInfo.cpp +++ b/Engine/source/platformPOSIX/POSIXCPUInfo.cpp @@ -22,30 +22,233 @@ #ifndef __APPLE__ +#include +#include +#include +#include +#include + #include "platform/platform.h" #include "platformPOSIX/platformPOSIX.h" -#include "console/console.h" -#include "core/stringTable.h" -#include "core/strings/stringFunctions.h" -#include - #include "platform/platformCPUCount.h" + +#include "console/console.h" + #include Platform::SystemInfo_struct Platform::SystemInfo; -void Processor::init() {} +static inline void rtrim(std::string &s) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); +} + +static inline void ltrim(std::string &s) +{ + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); +} + +static void getCPUInformation() +{ + std::string vendorString; + std::string brandString; + + std::ifstream cpuInfo("/proc/cpuinfo"); + + U32 logicalCoreCount = 0; + U32 physicalCoreCount = 1; + + if (cpuInfo.is_open()) + { + // Load every line of the CPU Info + std::string line; + + while (std::getline(cpuInfo, line)) + { + std::string fieldName = line.substr(0, line.find(":")); + rtrim(fieldName); + + // Entries are newline separated + if (fieldName == "") + { + ++logicalCoreCount; + continue; + } + + std::string fieldValue = line.substr(line.find(":") + 1, line.length()); + ltrim(fieldValue); + rtrim(fieldValue); + + // Load fields + if (fieldName == "vendor_id") + { + vendorString = fieldValue.c_str(); + } + else if (fieldName == "model name") + { + brandString = fieldValue.c_str(); + } + else if (fieldName == "cpu cores") + { + physicalCoreCount = dAtoui(fieldValue.c_str()); + } + else if (fieldName == "flags") + { + std::vector flags; + std::istringstream flagStream(fieldValue); + + std::string currentFlag; + while (std::getline(flagStream, currentFlag, ' ')) + { + flags.push_back(currentFlag); + } + + // Set CPU flags + if (std::find(flags.begin(), flags.end(), "fpu") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_FPU; + } + + if (std::find(flags.begin(), flags.end(), "sse3") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3; + } + + if (std::find(flags.begin(), flags.end(), "avx") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_AVX; + } + + if (std::find(flags.begin(), flags.end(), "ssse3") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3ex; + } + + if (std::find(flags.begin(), flags.end(), "sse") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE; + } + + if (std::find(flags.begin(), flags.end(), "sse2") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE2; + } + + if (std::find(flags.begin(), flags.end(), "sse4_1") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_1; + } + + if (std::find(flags.begin(), flags.end(), "sse4_2") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_2; + } + + if (std::find(flags.begin(), flags.end(), "mmx") != flags.end()) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_MMX; + } + } + } + + cpuInfo.close(); + } + else + { + logicalCoreCount = 1; + } + + Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount; + Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount; + Platform::SystemInfo.processor.isHyperThreaded = logicalCoreCount != physicalCoreCount; + Platform::SystemInfo.processor.isMultiCore = physicalCoreCount != 1; + Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount; + Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount; + if (Platform::SystemInfo.processor.isMultiCore) + { + Platform::SystemInfo.processor.properties |= CPU_PROP_MP; + } + + // Load processor base frequency + std::ifstream baseFrequencyStream("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency"); + if (baseFrequencyStream.is_open()) + { + U32 baseFrequencyKHz = 0; + baseFrequencyStream >> baseFrequencyKHz; + + Platform::SystemInfo.processor.mhz = baseFrequencyKHz / 1000; + baseFrequencyStream.close(); + } + + SetProcessorInfo(Platform::SystemInfo.processor, vendorString.c_str(), brandString.c_str()); +} + +void Processor::init() +{ + getCPUInformation(); + +#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_X32) + // Set sane default information + Platform::SystemInfo.processor.properties |= CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE ; + +#elif defined(TORQUE_CPU_ARM32) || defined(TORQUE_CPU_ARM64) + Platform::SystemInfo.processor.type = CPU_ArmCompatible; + Platform::SystemInfo.processor.name = StringTable->insert("Unknown ARM Processor"); + Platform::SystemInfo.processor.properties = CPU_PROP_C; +#else +#warning Unsupported CPU +#endif + + // Set 64bit flag +#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64) + Platform::SystemInfo.processor.properties |= CPU_PROP_64bit; +#endif + + // Once CPU information is resolved, produce an output like Windows does + Con::printf("Processor Init:"); + Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); + if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) + Con::printf(" MMX detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) + Con::printf(" SSE detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) + Con::printf(" SSE2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3) + Con::printf(" SSE3 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1) + Con::printf(" SSE4.1 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2) + Con::printf(" SSE4.2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX) + Con::printf(" AVX detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex) + Con::printf(" SSE3ex detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_MP) + Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors); + + Con::printf(" "); +} -// TODO LINUX CPUInfo::CPUCount better support namespace CPUInfo { - EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum) + EConfig CPUCount(U32 &logical, U32 &physical) { - PhysicalNum = TotAvailCore = 0; - TotAvailLogical = (int)sysconf(_SC_NPROCESSORS_ONLN); + // We don't set logical or physical here because it's already been determined by this point + if (Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1) + { + return CONFIG_SingleCoreHTEnabled; + } + else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors > 1) + { + return CONFIG_MultiCoreAndHTNotCapable; + } + else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1) + { + return CONFIG_SingleCoreAndHTNotCapable; + } - return CONFIG_SingleCoreHTDisabled; + return CONFIG_MultiCoreAndHTEnabled; } -}; // namespace CPUInfo +}; // namespace CPUInfo #endif \ No newline at end of file diff --git a/Engine/source/platformPOSIX/POSIXMath.cpp b/Engine/source/platformPOSIX/POSIXMath.cpp index ef1f089a4..8f21329a3 100644 --- a/Engine/source/platformPOSIX/POSIXMath.cpp +++ b/Engine/source/platformPOSIX/POSIXMath.cpp @@ -31,11 +31,6 @@ extern void mInstallLibrary_C(); extern void mInstallLibrary_ASM(); - -extern void mInstall_AMD_Math(); -extern void mInstall_Library_SSE(); - - //-------------------------------------- DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" "@brief Install the math library with specified extensions.\n\n" @@ -70,10 +65,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" properties |= CPU_PROP_MMX; continue; } - if (dStricmp(*argv, "3DNOW") == 0) { - properties |= CPU_PROP_3DNOW; - continue; - } if (dStricmp(*argv, "SSE") == 0) { properties |= CPU_PROP_SSE; continue; @@ -112,18 +103,12 @@ void Math::init(U32 properties) if (properties & CPU_PROP_MMX) { Con::printf(" Installing MMX extensions"); - if (properties & CPU_PROP_3DNOW) - { - Con::printf(" Installing 3DNow extensions"); - mInstall_AMD_Math(); - } } #if !defined(__MWERKS__) || (__MWERKS__ >= 0x2400) if (properties & CPU_PROP_SSE) { Con::printf(" Installing SSE extensions"); - mInstall_Library_SSE(); } #endif //mwerks>2.4 diff --git a/Engine/source/platformWin32/winCPUInfo.cpp b/Engine/source/platformWin32/winCPUInfo.cpp index 4c836ad9e..7b058fd8c 100644 --- a/Engine/source/platformWin32/winCPUInfo.cpp +++ b/Engine/source/platformWin32/winCPUInfo.cpp @@ -24,13 +24,89 @@ #include "platformWin32/platformWin32.h" #include "console/console.h" #include "core/stringTable.h" +#include "platform/platformCPUCount.h" #include #include Platform::SystemInfo_struct Platform::SystemInfo; extern void PlatformBlitInit(); -extern void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, - char* vendor, U32 processor, U32 properties, U32 properties2); // platform/platformCPU.cc + +static void getBrand(char* brand) +{ + S32 extendedInfo[4]; + __cpuid(extendedInfo, 0x80000000); + S32 numberExtendedIds = extendedInfo[0]; + + // Sets brand + if (numberExtendedIds >= 0x80000004) + { + int offset = 0; + for (int i = 0; i < 3; ++i) + { + S32 brandInfo[4]; + __cpuidex(brandInfo, 0x80000002 + i, 0); + + *reinterpret_cast(brand + offset + 0) = brandInfo[0]; + *reinterpret_cast(brand + offset + 4) = brandInfo[1]; + *reinterpret_cast(brand + offset + 8) = brandInfo[2]; + *reinterpret_cast(brand + offset + 12) = brandInfo[3]; + + offset += sizeof(S32) * 4; + } + } +} + +enum CpuFlags +{ + // EDX Register flags + BIT_MMX = BIT(23), + BIT_SSE = BIT(25), + BIT_SSE2 = BIT(26), + BIT_3DNOW = BIT(31), // only available for amd cpus in x86 + + // These use a different value for comparison than the above flags (ECX Register) + BIT_SSE3 = BIT(0), + BIT_SSE3ex = BIT(9), + BIT_SSE4_1 = BIT(19), + BIT_SSE4_2 = BIT(20), + + BIT_XSAVE_RESTORE = BIT(27), + BIT_AVX = BIT(28), +}; + +static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor) +{ + S32 cpuInfo[4]; + __cpuid(cpuInfo, 1); + U32 eax = cpuInfo[0]; // eax + U32 edx = cpuInfo[3]; // edx + U32 ecx = cpuInfo[2]; // ecx + + processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0; + processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0; + processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0; + processor.properties |= (ecx & BIT_SSE3) ? CPU_PROP_SSE3 : 0; + processor.properties |= (ecx & BIT_SSE3ex) ? CPU_PROP_SSE3ex : 0; + processor.properties |= (ecx & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0; + processor.properties |= (ecx & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0; + + // AVX detection requires that xsaverestore is supported + if (ecx & BIT_XSAVE_RESTORE && ecx & BIT_AVX) + { + bool supportsAVX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6; + if (supportsAVX) + { + processor.properties |= CPU_PROP_AVX; + } + } + + if (processor.isMultiCore) + processor.properties |= CPU_PROP_MP; + +#ifdef TORQUE_CPU_X64 + processor.properties |= CPU_PROP_64bit; +#endif +} void Processor::init() { @@ -40,18 +116,13 @@ void Processor::init() // www.intel.com // http://developer.intel.com/design/PentiumII/manuals/24512701.pdf - Con::printf("Processor Init:"); - Platform::SystemInfo.processor.type = CPU_X86Compatible; Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible"); Platform::SystemInfo.processor.mhz = 0; - Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_LE; + Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE; char vendor[0x20]; dMemset(vendor, 0, sizeof(vendor)); - U32 properties = 0; - U32 processor = 0; - U32 properties2 = 0; S32 vendorInfo[4]; __cpuid(vendorInfo, 0); @@ -59,17 +130,14 @@ void Processor::init() *reinterpret_cast(vendor + 4) = vendorInfo[3]; // edx *reinterpret_cast(vendor + 8) = vendorInfo[2]; // ecx - S32 cpuInfo[4]; - __cpuid(cpuInfo, 1); - processor = cpuInfo[0]; // eax - properties = cpuInfo[3]; // edx - properties2 = cpuInfo[2]; // ecx + char brand[0x40]; + dMemset(brand, 0, sizeof(brand)); + getBrand(brand); - SetProcessorInfo(Platform::SystemInfo.processor, vendor, processor, properties, properties2); + SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand); + detectCpuFeatures(Platform::SystemInfo.processor); -// now calculate speed of processor... - U32 nearmhz = 0; // nearest rounded mhz - U32 mhz = 0; // calculated value. + U32 mhz = 1000; // default if it can't be found LONG result; DWORD data = 0; @@ -83,56 +151,35 @@ void Processor::init() result = ::RegQueryValueExA (hKey, "~MHz",NULL, NULL,(LPBYTE)&data, &dataSize); if (result == ERROR_SUCCESS) - nearmhz = mhz = data; + mhz = data; ::RegCloseKey(hKey); } Platform::SystemInfo.processor.mhz = mhz; - if (mhz==0) - { - Con::printf(" %s, (Unknown) Mhz", Platform::SystemInfo.processor.name); - // stick SOMETHING in so it isn't ZERO. - Platform::SystemInfo.processor.mhz = 200; // seems a decent value. - } - else - { - if (nearmhz >= 1000) - Con::printf(" %s, ~%.2f Ghz", Platform::SystemInfo.processor.name, ((float)nearmhz)/1000.0f); - else - Con::printf(" %s, ~%d Mhz", Platform::SystemInfo.processor.name, nearmhz); - if (nearmhz != mhz) - { - if (mhz >= 1000) - Con::printf(" (timed at roughly %.2f Ghz)", ((float)mhz)/1000.0f); - else - Con::printf(" (timed at roughly %d Mhz)", mhz); - } - } - - if( Platform::SystemInfo.processor.numAvailableCores > 0 - || Platform::SystemInfo.processor.numPhysicalProcessors > 0 - || Platform::SystemInfo.processor.isHyperThreaded ) - Platform::SystemInfo.processor.properties |= CPU_PROP_MP; - - if (Platform::SystemInfo.processor.properties & CPU_PROP_FPU) - Con::printf( " FPU detected" ); + Con::printf("Processor Init:"); + Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) - Con::printf( " MMX detected" ); - if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW) - Con::printf( " 3DNow detected" ); + Con::printf(" MMX detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) - Con::printf( " SSE detected" ); - if( Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 ) - Con::printf( " SSE2 detected" ); - if( Platform::SystemInfo.processor.isHyperThreaded ) - Con::printf( " HT detected" ); - if( Platform::SystemInfo.processor.properties & CPU_PROP_MP ) - Con::printf( " MP detected [%i cores, %i logical, %i physical]", - Platform::SystemInfo.processor.numAvailableCores, - Platform::SystemInfo.processor.numLogicalProcessors, - Platform::SystemInfo.processor.numPhysicalProcessors ); + Con::printf(" SSE detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) + Con::printf(" SSE2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3) + Con::printf(" SSE3 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex) + Con::printf(" SSE3ex detected "); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1) + Con::printf(" SSE4.1 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2) + Con::printf(" SSE4.2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX) + Con::printf(" AVX detected"); + + if (Platform::SystemInfo.processor.properties & CPU_PROP_MP) + Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors); + Con::printf(" "); PlatformBlitInit(); diff --git a/Engine/source/platformWin32/winMath.cpp b/Engine/source/platformWin32/winMath.cpp index b273d25f0..44b215301 100644 --- a/Engine/source/platformWin32/winMath.cpp +++ b/Engine/source/platformWin32/winMath.cpp @@ -29,8 +29,6 @@ extern void mInstallLibrary_C(); extern void mInstallLibrary_ASM(); -extern void mInstall_AMD_Math(); -extern void mInstall_Library_SSE(); //-------------------------------------- DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" @@ -40,7 +38,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" " - 'C' Enable the C math routines. C routines are always enabled.\n\n" " - 'FPU' Enable floating point unit routines.\n\n" " - 'MMX' Enable MMX math routines.\n\n" - " - '3DNOW' Enable 3dNow! math routines.\n\n" " - 'SSE' Enable SSE math routines.\n\n" "@ingroup Math") @@ -72,10 +69,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )" properties |= CPU_PROP_MMX; continue; } - if (dStricmp(str, "3DNOW") == 0) { - properties |= CPU_PROP_3DNOW; - continue; - } if (dStricmp(str, "SSE") == 0) { properties |= CPU_PROP_SSE; continue; @@ -116,17 +109,11 @@ void Math::init(U32 properties) if (properties & CPU_PROP_MMX) { Con::printf(" Installing MMX extensions"); - if (properties & CPU_PROP_3DNOW) - { - Con::printf(" Installing 3DNow extensions"); - mInstall_AMD_Math(); - } } if (properties & CPU_PROP_SSE) { Con::printf(" Installing SSE extensions"); - mInstall_Library_SSE(); } Con::printf(" "); diff --git a/Engine/source/platformWin32/winPlatformCPUCount.cpp b/Engine/source/platformWin32/winPlatformCPUCount.cpp index e4a5d54d6..e4b113c67 100644 --- a/Engine/source/platformWin32/winPlatformCPUCount.cpp +++ b/Engine/source/platformWin32/winPlatformCPUCount.cpp @@ -26,6 +26,7 @@ #if defined( TORQUE_OS_WIN ) #include "platform/platformCPUCount.h" +#include "console/console.h" #include #include #include @@ -52,12 +53,10 @@ namespace CPUInfo { return bitSetCount; } - EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum ) + EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore ) { - EConfig StatusFlag = CONFIG_UserConfigIssue; TotAvailLogical = 0; TotAvailCore = 0; - PhysicalNum = 0; PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; DWORD returnLength = 0; @@ -68,42 +67,37 @@ namespace CPUInfo { rc = GetLogicalProcessorInformation( buffer, &returnLength ); + // if we fail, assume single threaded if( FALSE == rc ) { free( buffer ); - return StatusFlag; + Con::errorf("Unable to determine CPU Count, assuming 1 core"); + TotAvailCore = 1; + TotAvailLogical = 1; + return CONFIG_SingleCoreAndHTNotCapable; } +#pragma push +#pragma warning (disable: 6011) PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer; DWORD byteOffset = 0; while( byteOffset + sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) <= returnLength ) { - switch( ptr->Relationship ) - { - - case RelationProcessorCore: + if (ptr->Relationship == RelationProcessorCore) + { TotAvailCore++; - - // A hyperthreaded core supplies more than one logical processor. - TotAvailLogical += CountSetBits( ptr->ProcessorMask ); - break; - - case RelationProcessorPackage: - // Logical processors share a physical package. - PhysicalNum++; - break; - - default: - break; + TotAvailLogical += CountSetBits(ptr->ProcessorMask); } + byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ); ptr++; - } + } free( buffer ); +#pragma pop - StatusFlag = CONFIG_SingleCoreAndHTNotCapable; + EConfig StatusFlag = CONFIG_SingleCoreAndHTNotCapable; if( TotAvailCore == 1 && TotAvailLogical > TotAvailCore ) StatusFlag = CONFIG_SingleCoreHTEnabled;