mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-03-29 17:19:38 +00:00
Merge pull request #789 from JeffProgrammer/cpuinfo
Overhaul on CPU detection for Windows, Mac (x64/arm64) & Linux
This commit is contained in:
commit
53cd3ea36a
16 changed files with 530 additions and 1801 deletions
|
|
@ -1,216 +0,0 @@
|
|||
//-----------------------------------------------------------------------------
|
||||
// Copyright (c) 2012 GarageGames, LLC
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal in the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
// IN THE SOFTWARE.
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#include "math/mMathFn.h"
|
||||
#include "math/mPlane.h"
|
||||
#include "math/mMatrix.h"
|
||||
|
||||
|
||||
// extern void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult);
|
||||
// extern void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult);
|
||||
|
||||
/* not currently implemented.
|
||||
void Athlon_MatrixF_x_Point3F(const F32 *m, const F32 *p, F32 *presult)
|
||||
{
|
||||
m;
|
||||
p;
|
||||
presult;
|
||||
}
|
||||
*/
|
||||
|
||||
//============================================================
|
||||
// Here's the C code for MatF_x_MatF:
|
||||
// note that the code below does it in a different order (optimal asm, after all!)
|
||||
//
|
||||
// r[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8] + a[3]*b[12];
|
||||
// r[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9] + a[3]*b[13];
|
||||
// r[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14];
|
||||
// r[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15];
|
||||
//
|
||||
// r[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8] + a[7]*b[12];
|
||||
// r[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9] + a[7]*b[13];
|
||||
// r[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14];
|
||||
// r[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15];
|
||||
//
|
||||
// r[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12];
|
||||
// r[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13];
|
||||
// r[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14];
|
||||
// r[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15];
|
||||
//
|
||||
// r[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12];
|
||||
// r[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13];
|
||||
// r[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14];
|
||||
// r[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
|
||||
//============================================================
|
||||
|
||||
#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
|
||||
#define ADD_3DNOW_FUNCS
|
||||
// inlined version here.
|
||||
void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
|
||||
{
|
||||
__asm
|
||||
{
|
||||
femms
|
||||
|
||||
mov ecx, matA
|
||||
mov edx, matB
|
||||
mov eax, result
|
||||
|
||||
prefetch [ecx+32] ;// These may help -
|
||||
prefetch [edx+32] ;// and probably don't hurt
|
||||
|
||||
movq mm0,[ecx] ;// a21 | a11
|
||||
movq mm1,[ecx+8] ;// a41 | a31
|
||||
movq mm4,[edx] ;// b21 | b11
|
||||
punpckhdq mm2,mm0 ;// a21 |
|
||||
movq mm5,[edx+16] ;// b22 | b12
|
||||
punpckhdq mm3,mm1 ;// a41 |
|
||||
movq mm6,[edx+32] ;// b23 | b13
|
||||
punpckldq mm0,mm0 ;// a11 | a11
|
||||
punpckldq mm1,mm1 ;// a31 | a31
|
||||
pfmul mm4,mm0 ;// a11*b21 | a11*b11
|
||||
punpckhdq mm2,mm2 ;// a21 | a21
|
||||
pfmul mm0,[edx+8] ;// a11*b41 | a11*b31
|
||||
movq mm7,[edx+48] ;// b24 | b14
|
||||
pfmul mm5,mm2 ;// a21*b22 | a21*b12
|
||||
punpckhdq mm3,mm3 ;// a41 | a41
|
||||
pfmul mm2,[edx+24] ;// a21*b42 | a21*b32
|
||||
pfmul mm6,mm1 ;// a31*b23 | a31*b13
|
||||
pfadd mm5,mm4 ;// a21*b22 + a11*b21 | a21*b12 + a11*b11
|
||||
pfmul mm1,[edx+40] ;// a31*b43 | a31*b33
|
||||
pfadd mm2,mm0 ;// a21*b42 + a11*b41 | a21*b32 + a11*b31
|
||||
pfmul mm7,mm3 ;// a41*b24 | a41*b14
|
||||
pfadd mm6,mm5 ;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
|
||||
pfmul mm3,[edx+56] ;// a41*b44 | a41*b34
|
||||
pfadd mm2,mm1 ;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
|
||||
pfadd mm7,mm6 ;// a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13
|
||||
movq mm0,[ecx+16] ;// a22 | a12
|
||||
pfadd mm3,mm2 ;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
|
||||
movq mm1,[ecx+24] ;// a42 | a32
|
||||
movq [eax],mm7 ;// r21 | r11
|
||||
movq mm4,[edx] ;// b21 | b11
|
||||
movq [eax+8],mm3 ;// r41 | r31
|
||||
|
||||
punpckhdq mm2,mm0 ;// a22 | XXX
|
||||
movq mm5,[edx+16] ;// b22 | b12
|
||||
punpckhdq mm3,mm1 ;// a42 | XXX
|
||||
movq mm6,[edx+32] ;// b23 | b13
|
||||
punpckldq mm0,mm0 ;// a12 | a12
|
||||
punpckldq mm1,mm1 ;// a32 | a32
|
||||
pfmul mm4,mm0 ;// a12*b21 | a12*b11
|
||||
punpckhdq mm2,mm2 ;// a22 | a22
|
||||
pfmul mm0,[edx+8] ;// a12*b41 | a12*b31
|
||||
movq mm7,[edx+48] ;// b24 | b14
|
||||
pfmul mm5,mm2 ;// a22*b22 | a22*b12
|
||||
punpckhdq mm3,mm3 ;// a42 | a42
|
||||
pfmul mm2,[edx+24] ;// a22*b42 | a22*b32
|
||||
pfmul mm6,mm1 ;// a32*b23 | a32*b13
|
||||
pfadd mm5,mm4 ;// a12*b21 + a22*b22 | a12*b11 + a22*b12
|
||||
pfmul mm1,[edx+40] ;// a32*b43 | a32*b33
|
||||
pfadd mm2,mm0 ;// a12*b41 + a22*b42 | a12*b11 + a22*b32
|
||||
pfmul mm7,mm3 ;// a42*b24 | a42*b14
|
||||
pfadd mm6,mm5 ;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
|
||||
pfmul mm3,[edx+56] ;// a42*b44 | a42*b34
|
||||
pfadd mm2,mm1 ;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
|
||||
pfadd mm7,mm6 ;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
|
||||
movq mm0,[ecx+32] ;// a23 | a13
|
||||
pfadd mm3,mm2 ;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
|
||||
movq mm1,[ecx+40] ;// a43 | a33
|
||||
movq [eax+16],mm7 ;// r22 | r12
|
||||
movq mm4,[edx] ;// b21 | b11
|
||||
movq [eax+24],mm3 ;// r42 | r32
|
||||
|
||||
punpckhdq mm2,mm0 ;// a23 | XXX
|
||||
movq mm5,[edx+16] ;// b22 | b12
|
||||
punpckhdq mm3,mm1 ;// a43 | XXX
|
||||
movq mm6,[edx+32] ;// b23 | b13
|
||||
punpckldq mm0,mm0 ;// a13 | a13
|
||||
punpckldq mm1,mm1 ;// a33 | a33
|
||||
pfmul mm4,mm0 ;// a13*b21 | a13*b11
|
||||
punpckhdq mm2,mm2 ;// a23 | a23
|
||||
pfmul mm0,[edx+8] ;// a13*b41 | a13*b31
|
||||
movq mm7,[edx+48] ;// b24 | b14
|
||||
pfmul mm5,mm2 ;// a23*b22 | a23*b12
|
||||
punpckhdq mm3,mm3 ;// a43 | a43
|
||||
pfmul mm2,[edx+24] ;// a23*b42 | a23*b32
|
||||
pfmul mm6,mm1 ;// a33*b23 | a33*b13
|
||||
pfadd mm5,mm4 ;// a23*b22 + a13*b21 | a23*b12 + a13*b11
|
||||
pfmul mm1,[edx+40] ;// a33*b43 | a33*b33
|
||||
pfadd mm2,mm0 ;// a13*b41 + a23*b42 | a13*b31 + a23*b32
|
||||
pfmul mm7,mm3 ;// a43*b24 | a43*b14
|
||||
pfadd mm6,mm5 ;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
|
||||
pfmul mm3,[edx+56] ;// a43*b44 | a43*b34
|
||||
pfadd mm2,mm1 ;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
|
||||
pfadd mm7,mm6 ;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
|
||||
movq mm0,[ecx+48] ;// a24 | a14
|
||||
pfadd mm3,mm2 ;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
|
||||
movq mm1,[ecx+56] ;// a44 | a34
|
||||
movq [eax+32],mm7 ;// r23 | r13
|
||||
movq mm4,[edx] ;// b21 | b11
|
||||
movq [eax+40],mm3 ;// r43 | r33
|
||||
|
||||
punpckhdq mm2,mm0 ;// a24 | XXX
|
||||
movq mm5,[edx+16] ;// b22 | b12
|
||||
punpckhdq mm3,mm1 ;// a44 | XXX
|
||||
movq mm6,[edx+32] ;// b23 | b13
|
||||
punpckldq mm0,mm0 ;// a14 | a14
|
||||
punpckldq mm1,mm1 ;// a34 | a34
|
||||
pfmul mm4,mm0 ;// a14*b21 | a14*b11
|
||||
punpckhdq mm2,mm2 ;// a24 | a24
|
||||
pfmul mm0,[edx+8] ;// a14*b41 | a14*b31
|
||||
movq mm7,[edx+48] ;// b24 | b14
|
||||
pfmul mm5,mm2 ;// a24*b22 | a24*b12
|
||||
punpckhdq mm3,mm3 ;// a44 | a44
|
||||
pfmul mm2,[edx+24] ;// a24*b 42 | a24*b32
|
||||
pfmul mm6,mm1 ;// a34*b23 | a34*b13
|
||||
pfadd mm5,mm4 ;// a14*b21 + a24*b22 | a14*b11 + a24*b12
|
||||
pfmul mm1,[edx+40] ;// a34*b43 | a34*b33
|
||||
pfadd mm2,mm0 ;// a14*b41 + a24*b 42 | a14*b31 + a24*b32
|
||||
pfmul mm7,mm3 ;// a44*b24 | a44*b14
|
||||
pfadd mm6,mm5 ;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
|
||||
pfmul mm3,[edx+56] ;// a44*b44 | a44*b34
|
||||
pfadd mm2,mm1 ;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
|
||||
pfadd mm7,mm6 ;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
|
||||
pfadd mm3,mm2 ;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
|
||||
movq [eax+48],mm7 ;// r24 | r14
|
||||
movq [eax+56],mm3 ;// r44 | r34
|
||||
femms
|
||||
}
|
||||
}
|
||||
#elif defined(TORQUE_SUPPORTS_NASM)
|
||||
#define ADD_3DNOW_FUNCS
|
||||
extern "C"
|
||||
{
|
||||
void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void mInstall_AMD_Math()
|
||||
{
|
||||
#if defined(ADD_3DNOW_FUNCS)
|
||||
m_matF_x_matF = Athlon_MatrixF_x_MatrixF;
|
||||
#endif
|
||||
// m_matF_x_point3F = Athlon_MatrixF_x_Point3F;
|
||||
// m_matF_x_vectorF = Athlon_MatrixF_x_VectorF;
|
||||
}
|
||||
|
||||
|
|
@ -1,177 +0,0 @@
|
|||
;-----------------------------------------------------------------------------
|
||||
; Copyright (c) 2012 GarageGames, LLC
|
||||
;
|
||||
; Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
; of this software and associated documentation files (the "Software"), to
|
||||
; deal in the Software without restriction, including without limitation the
|
||||
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
; sell copies of the Software, and to permit persons to whom the Software is
|
||||
; furnished to do so, subject to the following conditions:
|
||||
;
|
||||
; The above copyright notice and this permission notice shall be included in
|
||||
; all copies or substantial portions of the Software.
|
||||
;
|
||||
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
; IN THE SOFTWARE.
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
segment .data
|
||||
|
||||
matA dd 0
|
||||
result dd 0
|
||||
matB dd 0
|
||||
|
||||
segment .text
|
||||
|
||||
%macro export_fn 1
|
||||
%ifidn __OUTPUT_FORMAT__, elf
|
||||
; No underscore needed for ELF object files
|
||||
global %1
|
||||
%1:
|
||||
%else
|
||||
global _%1
|
||||
_%1:
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
|
||||
%define arg(x) [esp+(x*4)]
|
||||
|
||||
|
||||
|
||||
;void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
|
||||
|
||||
export_fn Athlon_MatrixF_x_MatrixF
|
||||
|
||||
mov ecx, arg(1)
|
||||
mov edx, arg(2)
|
||||
mov eax, arg(3)
|
||||
|
||||
femms
|
||||
prefetch [ecx+32] ; These may help -
|
||||
prefetch [edx+32] ; and probably don't hurt
|
||||
|
||||
movq mm0,[ecx] ; a21 | a11
|
||||
movq mm1,[ecx+8] ; a41 | a31
|
||||
movq mm4,[edx] ; b21 | b11
|
||||
punpckhdq mm2,mm0 ; a21 |
|
||||
movq mm5,[edx+16] ; b22 | b12
|
||||
punpckhdq mm3,mm1 ; a41 |
|
||||
movq mm6,[edx+32] ; b23 | b13
|
||||
punpckldq mm0,mm0 ; a11 | a11
|
||||
punpckldq mm1,mm1 ; a31 | a31
|
||||
pfmul mm4,mm0 ; a11*b21 | a11*b11
|
||||
punpckhdq mm2,mm2 ; a21 | a21
|
||||
pfmul mm0,[edx+8] ; a11*b41 | a11*b31
|
||||
movq mm7,[edx+48] ; b24 | b14
|
||||
pfmul mm5,mm2 ; a21*b22 | a21*b12
|
||||
punpckhdq mm3,mm3 ; a41 | a41
|
||||
pfmul mm2,[edx+24] ; a21*b42 | a21*b32
|
||||
pfmul mm6,mm1 ; a31*b23 | a31*b13
|
||||
pfadd mm5,mm4 ; a21*b22 + a11*b21 | a21*b12 + a11*b11
|
||||
pfmul mm1,[edx+40] ; a31*b43 | a31*b33
|
||||
pfadd mm2,mm0 ; a21*b42 + a11*b41 | a21*b32 + a11*b31
|
||||
pfmul mm7,mm3 ; a41*b24 | a41*b14
|
||||
pfadd mm6,mm5 ; a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
|
||||
pfmul mm3,[edx+56] ; a41*b44 | a41*b34
|
||||
pfadd mm2,mm1 ; a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
|
||||
pfadd mm7,mm6 ; a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13
|
||||
movq mm0,[ecx+16] ; a22 | a12
|
||||
pfadd mm3,mm2 ; a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
|
||||
movq mm1,[ecx+24] ; a42 | a32
|
||||
movq [eax],mm7 ; r21 | r11
|
||||
movq mm4,[edx] ; b21 | b11
|
||||
movq [eax+8],mm3 ; r41 | r31
|
||||
|
||||
punpckhdq mm2,mm0 ; a22 | XXX
|
||||
movq mm5,[edx+16] ; b22 | b12
|
||||
punpckhdq mm3,mm1 ; a42 | XXX
|
||||
movq mm6,[edx+32] ; b23 | b13
|
||||
punpckldq mm0,mm0 ; a12 | a12
|
||||
punpckldq mm1,mm1 ; a32 | a32
|
||||
pfmul mm4,mm0 ; a12*b21 | a12*b11
|
||||
punpckhdq mm2,mm2 ; a22 | a22
|
||||
pfmul mm0,[edx+8] ; a12*b41 | a12*b31
|
||||
movq mm7,[edx+48] ; b24 | b14
|
||||
pfmul mm5,mm2 ; a22*b22 | a22*b12
|
||||
punpckhdq mm3,mm3 ; a42 | a42
|
||||
pfmul mm2,[edx+24] ; a22*b42 | a22*b32
|
||||
pfmul mm6,mm1 ; a32*b23 | a32*b13
|
||||
pfadd mm5,mm4 ; a12*b21 + a22*b22 | a12*b11 + a22*b12
|
||||
pfmul mm1,[edx+40] ; a32*b43 | a32*b33
|
||||
pfadd mm2,mm0 ; a12*b41 + a22*b42 | a12*b11 + a22*b32
|
||||
pfmul mm7,mm3 ; a42*b24 | a42*b14
|
||||
pfadd mm6,mm5 ; a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
|
||||
pfmul mm3,[edx+56] ; a42*b44 | a42*b34
|
||||
pfadd mm2,mm1 ; a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
|
||||
pfadd mm7,mm6 ; a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
|
||||
movq mm0,[ecx+32] ; a23 | a13
|
||||
pfadd mm3,mm2 ; a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
|
||||
movq mm1,[ecx+40] ; a43 | a33
|
||||
movq [eax+16],mm7 ; r22 | r12
|
||||
movq mm4,[edx] ; b21 | b11
|
||||
movq [eax+24],mm3 ; r42 | r32
|
||||
|
||||
punpckhdq mm2,mm0 ; a23 | XXX
|
||||
movq mm5,[edx+16] ; b22 | b12
|
||||
punpckhdq mm3,mm1 ; a43 | XXX
|
||||
movq mm6,[edx+32] ; b23 | b13
|
||||
punpckldq mm0,mm0 ; a13 | a13
|
||||
punpckldq mm1,mm1 ; a33 | a33
|
||||
pfmul mm4,mm0 ; a13*b21 | a13*b11
|
||||
punpckhdq mm2,mm2 ; a23 | a23
|
||||
pfmul mm0,[edx+8] ; a13*b41 | a13*b31
|
||||
movq mm7,[edx+48] ; b24 | b14
|
||||
pfmul mm5,mm2 ; a23*b22 | a23*b12
|
||||
punpckhdq mm3,mm3 ; a43 | a43
|
||||
pfmul mm2,[edx+24] ; a23*b42 | a23*b32
|
||||
pfmul mm6,mm1 ; a33*b23 | a33*b13
|
||||
pfadd mm5,mm4 ; a23*b22 + a13*b21 | a23*b12 + a13*b11
|
||||
pfmul mm1,[edx+40] ; a33*b43 | a33*b33
|
||||
pfadd mm2,mm0 ; a13*b41 + a23*b42 | a13*b31 + a23*b32
|
||||
pfmul mm7,mm3 ; a43*b24 | a43*b14
|
||||
pfadd mm6,mm5 ; a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
|
||||
pfmul mm3,[edx+56] ; a43*b44 | a43*b34
|
||||
pfadd mm2,mm1 ; a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
|
||||
pfadd mm7,mm6 ; a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
|
||||
movq mm0,[ecx+48] ; a24 | a14
|
||||
pfadd mm3,mm2 ; a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
|
||||
movq mm1,[ecx+56] ; a44 | a34
|
||||
movq [eax+32],mm7 ; r23 | r13
|
||||
movq mm4,[edx] ; b21 | b11
|
||||
movq [eax+40],mm3 ; r43 | r33
|
||||
|
||||
punpckhdq mm2,mm0 ; a24 | XXX
|
||||
movq mm5,[edx+16] ; b22 | b12
|
||||
punpckhdq mm3,mm1 ; a44 | XXX
|
||||
movq mm6,[edx+32] ; b23 | b13
|
||||
punpckldq mm0,mm0 ; a14 | a14
|
||||
punpckldq mm1,mm1 ; a34 | a34
|
||||
pfmul mm4,mm0 ; a14*b21 | a14*b11
|
||||
punpckhdq mm2,mm2 ; a24 | a24
|
||||
pfmul mm0,[edx+8] ; a14*b41 | a14*b31
|
||||
movq mm7,[edx+48] ; b24 | b14
|
||||
pfmul mm5,mm2 ; a24*b22 | a24*b12
|
||||
punpckhdq mm3,mm3 ; a44 | a44
|
||||
pfmul mm2,[edx+24] ; a24*b 42 | a24*b32
|
||||
pfmul mm6,mm1 ; a34*b23 | a34*b13
|
||||
pfadd mm5,mm4 ; a14*b21 + a24*b22 | a14*b11 + a24*b12
|
||||
pfmul mm1,[edx+40] ; a34*b43 | a34*b33
|
||||
pfadd mm2,mm0 ; a14*b41 + a24*b 42 | a14*b31 + a24*b32
|
||||
pfmul mm7,mm3 ; a44*b24 | a44*b14
|
||||
pfadd mm6,mm5 ; a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
|
||||
pfmul mm3,[edx+56] ; a44*b44 | a44*b34
|
||||
pfadd mm2,mm1 ; a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
|
||||
pfadd mm7,mm6 ; a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
|
||||
pfadd mm3,mm2 ; a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
|
||||
movq [eax+48],mm7 ; r24 | r14
|
||||
movq [eax+56],mm3 ; r44 | r34
|
||||
femms
|
||||
|
||||
ret
|
||||
|
|
@ -60,19 +60,6 @@ TEST(MatrixF, MultiplyImplmentations)
|
|||
U32 cpuProperties = Platform::SystemInfo.processor.properties;
|
||||
bool same;
|
||||
|
||||
// Test 3D NOW! if it is available
|
||||
F32 mrAMD[16];
|
||||
if (cpuProperties & CPU_PROP_3DNOW)
|
||||
{
|
||||
Athlon_MatrixF_x_MatrixF(m1, m2, mrAMD);
|
||||
|
||||
same = true;
|
||||
for (S32 i = 0; i < 16; i++)
|
||||
same &= mIsEqual(mrC[i], mrAMD[i]);
|
||||
|
||||
EXPECT_TRUE(same) << "Matrix multiplication verification failed. (C vs. 3D NOW!)";
|
||||
}
|
||||
|
||||
// Test SSE if it is available
|
||||
F32 mrSSE[16];
|
||||
if (cpuProperties & CPU_PROP_SSE)
|
||||
|
|
|
|||
|
|
@ -55,50 +55,11 @@
|
|||
/// @note These enums must be globally scoped so that they work with the inline assembly
|
||||
enum ProcessorType
|
||||
{
|
||||
// x86
|
||||
CPU_X86Compatible,
|
||||
CPU_Intel_Unknown,
|
||||
CPU_Intel_486,
|
||||
CPU_Intel_Pentium,
|
||||
CPU_Intel_PentiumMMX,
|
||||
CPU_Intel_PentiumPro,
|
||||
CPU_Intel_PentiumII,
|
||||
CPU_Intel_PentiumCeleron,
|
||||
CPU_Intel_PentiumIII,
|
||||
CPU_Intel_Pentium4,
|
||||
CPU_Intel_PentiumM,
|
||||
CPU_Intel_Core,
|
||||
CPU_Intel_Core2,
|
||||
CPU_Intel_Corei7Xeon, // Core i7 or Xeon
|
||||
CPU_AMD_K6,
|
||||
CPU_AMD_K6_2,
|
||||
CPU_AMD_K6_3,
|
||||
CPU_AMD_Athlon,
|
||||
CPU_AMD_Phenom,
|
||||
CPU_AMD_PhenomII,
|
||||
CPU_AMD_Bulldozer,
|
||||
CPU_AMD_Unknown,
|
||||
CPU_Cyrix_6x86,
|
||||
CPU_Cyrix_MediaGX,
|
||||
CPU_Cyrix_6x86MX,
|
||||
CPU_Cyrix_GXm, ///< Media GX w/ MMX
|
||||
CPU_Cyrix_Unknown,
|
||||
|
||||
// PowerPC
|
||||
CPU_PowerPC_Unknown,
|
||||
CPU_PowerPC_601,
|
||||
CPU_PowerPC_603,
|
||||
CPU_PowerPC_603e,
|
||||
CPU_PowerPC_603ev,
|
||||
CPU_PowerPC_604,
|
||||
CPU_PowerPC_604e,
|
||||
CPU_PowerPC_604ev,
|
||||
CPU_PowerPC_G3,
|
||||
CPU_PowerPC_G4,
|
||||
CPU_PowerPC_G4_7450,
|
||||
CPU_PowerPC_G4_7455,
|
||||
CPU_PowerPC_G4_7447,
|
||||
CPU_PowerPC_G5,
|
||||
CPU_ArmCompatible,
|
||||
CPU_Intel,
|
||||
CPU_AMD,
|
||||
CPU_Apple
|
||||
};
|
||||
|
||||
/// Properties for CPU.
|
||||
|
|
@ -107,17 +68,17 @@ enum ProcessorProperties
|
|||
CPU_PROP_C = (1<<0), ///< We should use C fallback math functions.
|
||||
CPU_PROP_FPU = (1<<1), ///< Has an FPU. (It better!)
|
||||
CPU_PROP_MMX = (1<<2), ///< Supports MMX instruction set extension.
|
||||
CPU_PROP_3DNOW = (1<<3), ///< Supports AMD 3dNow! instruction set extension.
|
||||
CPU_PROP_SSE = (1<<4), ///< Supports SSE instruction set extension.
|
||||
CPU_PROP_RDTSC = (1<<5), ///< Supports Read Time Stamp Counter op.
|
||||
CPU_PROP_SSE2 = (1<<6), ///< Supports SSE2 instruction set extension.
|
||||
CPU_PROP_SSE3 = (1<<7), ///< Supports SSE3 instruction set extension.
|
||||
CPU_PROP_SSE3xt = (1<<8), ///< Supports extended SSE3 instruction set
|
||||
CPU_PROP_SSE4_1 = (1<<9), ///< Supports SSE4_1 instruction set extension.
|
||||
CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension.
|
||||
CPU_PROP_MP = (1<<11), ///< This is a multi-processor system.
|
||||
CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN.
|
||||
CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable
|
||||
CPU_PROP_SSE = (1<<3), ///< Supports SSE instruction set extension.
|
||||
CPU_PROP_SSE2 = (1<<4), ///< Supports SSE2 instruction set extension.
|
||||
CPU_PROP_SSE3 = (1<<5), ///< Supports SSE3 instruction set extension.
|
||||
CPU_PROP_SSE3ex = (1<<6), ///< Supports Supplemental SSE3 instruction set
|
||||
CPU_PROP_SSE4_1 = (1<<7), ///< Supports SSE4_1 instruction set extension.
|
||||
CPU_PROP_SSE4_2 = (1<<8), ///< Supports SSE4_2 instruction set extension.
|
||||
CPU_PROP_AVX = (1<<9), ///< Supports AVX256 instruction set extension.
|
||||
CPU_PROP_MP = (1<<10), ///< This is a multi-processor system.
|
||||
CPU_PROP_LE = (1<<11), ///< This processor is LITTLE ENDIAN.
|
||||
CPU_PROP_64bit = (1<<12), ///< This processor is 64-bit capable
|
||||
CPU_PROP_NEON = (1<<13), ///< Supports the Arm Neon instruction set extension.
|
||||
};
|
||||
|
||||
/// Processor info manager.
|
||||
|
|
@ -336,7 +297,6 @@ namespace Platform
|
|||
bool isHyperThreaded;
|
||||
U32 numLogicalProcessors;
|
||||
U32 numPhysicalProcessors;
|
||||
U32 numAvailableCores;
|
||||
U32 properties; // CPU type specific enum
|
||||
} processor;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -28,264 +28,47 @@
|
|||
|
||||
Signal<void(void)> Platform::SystemInfoReady;
|
||||
|
||||
enum CPUFlags
|
||||
{
|
||||
// EDX Register flags
|
||||
BIT_FPU = BIT(0),
|
||||
BIT_RDTSC = BIT(4),
|
||||
BIT_MMX = BIT(23),
|
||||
BIT_SSE = BIT(25),
|
||||
BIT_SSE2 = BIT(26),
|
||||
BIT_3DNOW = BIT(31),
|
||||
|
||||
// These use a different value for comparison than the above flags (ECX Register)
|
||||
BIT_SSE3 = BIT(0),
|
||||
BIT_SSE3xt = BIT(9),
|
||||
BIT_SSE4_1 = BIT(19),
|
||||
BIT_SSE4_2 = BIT(20),
|
||||
};
|
||||
|
||||
// fill the specified structure with information obtained from asm code
|
||||
void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
|
||||
char* vendor, U32 processor, U32 properties, U32 properties2)
|
||||
void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand)
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= (properties & BIT_FPU) ? CPU_PROP_FPU : 0;
|
||||
Platform::SystemInfo.processor.properties |= (properties & BIT_RDTSC) ? CPU_PROP_RDTSC : 0;
|
||||
Platform::SystemInfo.processor.properties |= (properties & BIT_MMX) ? CPU_PROP_MMX : 0;
|
||||
|
||||
if (dStricmp(vendor, "GenuineIntel") == 0)
|
||||
{
|
||||
pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0;
|
||||
pInfo.properties |= (properties & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
|
||||
pInfo.properties |= (properties2 & BIT_SSE3) ? CPU_PROP_SSE3 : 0;
|
||||
pInfo.properties |= (properties2 & BIT_SSE3xt) ? CPU_PROP_SSE3xt : 0;
|
||||
pInfo.properties |= (properties2 & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
|
||||
pInfo.properties |= (properties2 & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
|
||||
|
||||
pInfo.type = CPU_Intel_Unknown;
|
||||
// switch on processor family code
|
||||
switch ((processor >> 8) & 0x0f)
|
||||
{
|
||||
case 4:
|
||||
pInfo.type = CPU_Intel_486;
|
||||
pInfo.name = StringTable->insert("Intel 486 class");
|
||||
break;
|
||||
|
||||
// Pentium Family
|
||||
case 5:
|
||||
// switch on processor model code
|
||||
switch ((processor >> 4) & 0xf)
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
pInfo.type = CPU_Intel_Pentium;
|
||||
pInfo.name = StringTable->insert("Intel Pentium");
|
||||
break;
|
||||
case 4:
|
||||
pInfo.type = CPU_Intel_PentiumMMX;
|
||||
pInfo.name = StringTable->insert("Intel Pentium MMX");
|
||||
break;
|
||||
default:
|
||||
pInfo.type = CPU_Intel_Pentium;
|
||||
pInfo.name = StringTable->insert( "Intel (unknown)" );
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
// Pentium Pro/II/II family
|
||||
case 6:
|
||||
{
|
||||
U32 extendedModel = ( processor & 0xf0000 ) >> 16;
|
||||
// switch on processor model code
|
||||
switch ((processor >> 4) & 0xf)
|
||||
{
|
||||
case 1:
|
||||
pInfo.type = CPU_Intel_PentiumPro;
|
||||
pInfo.name = StringTable->insert("Intel Pentium Pro");
|
||||
break;
|
||||
case 3:
|
||||
case 5:
|
||||
pInfo.type = CPU_Intel_PentiumII;
|
||||
pInfo.name = StringTable->insert("Intel Pentium II");
|
||||
break;
|
||||
case 6:
|
||||
pInfo.type = CPU_Intel_PentiumCeleron;
|
||||
pInfo.name = StringTable->insert("Intel Pentium Celeron");
|
||||
break;
|
||||
case 7:
|
||||
case 8:
|
||||
case 11:
|
||||
pInfo.type = CPU_Intel_PentiumIII;
|
||||
pInfo.name = StringTable->insert("Intel Pentium III");
|
||||
break;
|
||||
case 0xA:
|
||||
if( extendedModel == 1)
|
||||
{
|
||||
pInfo.type = CPU_Intel_Corei7Xeon;
|
||||
pInfo.name = StringTable->insert( "Intel Core i7 / Xeon" );
|
||||
}
|
||||
else
|
||||
{
|
||||
pInfo.type = CPU_Intel_PentiumIII;
|
||||
pInfo.name = StringTable->insert( "Intel Pentium III Xeon" );
|
||||
}
|
||||
break;
|
||||
case 0xD:
|
||||
if( extendedModel == 1 )
|
||||
{
|
||||
pInfo.type = CPU_Intel_Corei7Xeon;
|
||||
pInfo.name = StringTable->insert( "Intel Core i7 / Xeon" );
|
||||
}
|
||||
else
|
||||
{
|
||||
pInfo.type = CPU_Intel_PentiumM;
|
||||
pInfo.name = StringTable->insert( "Intel Pentium/Celeron M" );
|
||||
}
|
||||
break;
|
||||
case 0xE:
|
||||
pInfo.type = CPU_Intel_Core;
|
||||
pInfo.name = StringTable->insert( "Intel Core" );
|
||||
break;
|
||||
case 0xF:
|
||||
pInfo.type = CPU_Intel_Core2;
|
||||
pInfo.name = StringTable->insert( "Intel Core 2" );
|
||||
break;
|
||||
default:
|
||||
pInfo.type = CPU_Intel_PentiumPro;
|
||||
pInfo.name = StringTable->insert( "Intel (unknown)" );
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Pentium4 Family
|
||||
case 0xf:
|
||||
pInfo.type = CPU_Intel_Pentium4;
|
||||
pInfo.name = StringTable->insert( "Intel Pentium 4" );
|
||||
break;
|
||||
|
||||
default:
|
||||
pInfo.type = CPU_Intel_Unknown;
|
||||
pInfo.name = StringTable->insert( "Intel (unknown)" );
|
||||
break;
|
||||
}
|
||||
pInfo.type = CPU_Intel;
|
||||
pInfo.name = StringTable->insert(brand ? brand : "Intel (Unknown)");
|
||||
}
|
||||
//--------------------------------------
|
||||
else if (dStricmp(vendor, "AuthenticAMD") == 0)
|
||||
{
|
||||
pInfo.name = StringTable->insert(brand ? brand : "AMD (unknown)");
|
||||
pInfo.type = CPU_AMD;
|
||||
}
|
||||
else if (dStricmp(vendor, "Apple") == 0)
|
||||
{
|
||||
pInfo.name = StringTable->insert(brand ? brand : "Apple (unknown)");
|
||||
pInfo.type = CPU_Apple;
|
||||
}
|
||||
else
|
||||
if (dStricmp(vendor, "AuthenticAMD") == 0)
|
||||
{
|
||||
// AthlonXP processors support SSE
|
||||
pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0;
|
||||
pInfo.properties |= ( properties & BIT_SSE2 ) ? CPU_PROP_SSE2 : 0;
|
||||
pInfo.properties |= (properties & BIT_3DNOW) ? CPU_PROP_3DNOW : 0;
|
||||
// Phenom and PhenomII support SSE3, SSE4a
|
||||
pInfo.properties |= ( properties2 & BIT_SSE3 ) ? CPU_PROP_SSE3 : 0;
|
||||
pInfo.properties |= ( properties2 & BIT_SSE4_1 ) ? CPU_PROP_SSE4_1 : 0;
|
||||
// switch on processor family code
|
||||
switch ((processor >> 8) & 0xf)
|
||||
{
|
||||
// K6 Family
|
||||
case 5:
|
||||
// switch on processor model code
|
||||
switch ((processor >> 4) & 0xf)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
pInfo.type = CPU_AMD_K6_3;
|
||||
pInfo.name = StringTable->insert("AMD K5");
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
pInfo.type = CPU_AMD_K6;
|
||||
pInfo.name = StringTable->insert("AMD K6");
|
||||
break;
|
||||
case 8:
|
||||
pInfo.type = CPU_AMD_K6_2;
|
||||
pInfo.name = StringTable->insert("AMD K6-2");
|
||||
break;
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
pInfo.type = CPU_AMD_K6_3;
|
||||
pInfo.name = StringTable->insert("AMD K6-3");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
// Athlon Family
|
||||
case 6:
|
||||
pInfo.type = CPU_AMD_Athlon;
|
||||
pInfo.name = StringTable->insert("AMD Athlon");
|
||||
break;
|
||||
|
||||
// Phenom Family
|
||||
case 15:
|
||||
pInfo.type = CPU_AMD_Phenom;
|
||||
pInfo.name = StringTable->insert("AMD Phenom");
|
||||
break;
|
||||
|
||||
// Phenom II Family
|
||||
case 16:
|
||||
pInfo.type = CPU_AMD_PhenomII;
|
||||
pInfo.name = StringTable->insert("AMD Phenom II");
|
||||
break;
|
||||
|
||||
// Bulldozer Family
|
||||
case 17:
|
||||
pInfo.type = CPU_AMD_Bulldozer;
|
||||
pInfo.name = StringTable->insert("AMD Bulldozer");
|
||||
break;
|
||||
|
||||
default:
|
||||
pInfo.type = CPU_AMD_Unknown;
|
||||
pInfo.name = StringTable->insert("AMD (unknown)");
|
||||
break;
|
||||
}
|
||||
}
|
||||
//--------------------------------------
|
||||
else
|
||||
if (dStricmp(vendor, "CyrixInstead") == 0)
|
||||
{
|
||||
switch (processor)
|
||||
{
|
||||
case 0x520:
|
||||
pInfo.type = CPU_Cyrix_6x86;
|
||||
pInfo.name = StringTable->insert("Cyrix 6x86");
|
||||
break;
|
||||
case 0x440:
|
||||
pInfo.type = CPU_Cyrix_MediaGX;
|
||||
pInfo.name = StringTable->insert("Cyrix Media GX");
|
||||
break;
|
||||
case 0x600:
|
||||
pInfo.type = CPU_Cyrix_6x86MX;
|
||||
pInfo.name = StringTable->insert("Cyrix 6x86mx/MII");
|
||||
break;
|
||||
case 0x540:
|
||||
pInfo.type = CPU_Cyrix_GXm;
|
||||
pInfo.name = StringTable->insert("Cyrix GXm");
|
||||
break;
|
||||
default:
|
||||
pInfo.type = CPU_Cyrix_Unknown;
|
||||
pInfo.name = StringTable->insert("Cyrix (unknown)");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
|
||||
|
||||
pInfo.name = StringTable->insert(brand ? brand : "x86 Compatible (unknown)");
|
||||
pInfo.type = CPU_X86Compatible;
|
||||
|
||||
#elif defined(TORQUE_CPU_ARM64)
|
||||
pInfo.name = StringTable->insert(brand ? brand : "Arm Compatible (unknown)");
|
||||
pInfo.type = CPU_ArmCompatible;
|
||||
|
||||
#else
|
||||
#error "Unknown CPU Architecture"
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// Get multithreading caps.
|
||||
|
||||
CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numAvailableCores, pInfo.numPhysicalProcessors );
|
||||
CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numPhysicalProcessors );
|
||||
pInfo.isHyperThreaded = CPUInfo::isHyperThreaded( config );
|
||||
pInfo.isMultiCore = CPUInfo::isMultiCore( config );
|
||||
|
||||
// Trigger the signal
|
||||
Platform::SystemInfoReady.trigger();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,657 +0,0 @@
|
|||
// Original code is:
|
||||
// Copyright (c) 2005 Intel Corporation
|
||||
// All Rights Reserved
|
||||
//
|
||||
// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
|
||||
// The three forms of HW multithreading are: Multi-processor, Multi-core, and
|
||||
// HyperThreading Technology.
|
||||
// This application enumerates all the logical processors enabled by OS and BIOS,
|
||||
// determine the HW topology of these enabled logical processors in the system
|
||||
// using information provided by CPUID instruction.
|
||||
// A multi-processing system can support any combination of the three forms of HW
|
||||
// multi-threading support. The relevant topology can be identified using a
|
||||
// three level decomposition of the "initial APIC ID" into
|
||||
// Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of
|
||||
// the topology of hardware resources and
|
||||
// allow multi-threaded software to manage shared hardware resources in
|
||||
// the platform to reduce resource contention
|
||||
|
||||
// Multicore detection algorithm for processor and cache topology requires
|
||||
// all leaf functions of CPUID instructions be available. System administrator
|
||||
// must ensure BIOS settings is not configured to restrict CPUID functionalities.
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined(TORQUE_OS_LINUX) || defined(LINUX)
|
||||
|
||||
// TODO GCC code don't compile on Release with optimizations, mover code to platform layer
|
||||
|
||||
#else
|
||||
|
||||
#include "platform/platform.h"
|
||||
#include "platform/platformCPUCount.h"
|
||||
|
||||
#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX)
|
||||
|
||||
#ifdef TORQUE_OS_LINUX
|
||||
// The Linux source code listing can be compiled using Linux kernel verison 2.6
|
||||
// or higher (e.g. RH 4AS-2.8 using GCC 3.4.4).
|
||||
// Due to syntax variances of Linux affinity APIs with earlier kernel versions
|
||||
// and dependence on glibc library versions, compilation on Linux environment
|
||||
// with older kernels and compilers may require kernel patches or compiler upgrades.
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <sched.h>
|
||||
#define DWORD unsigned long
|
||||
#elif defined( TORQUE_OS_WIN )
|
||||
#include <windows.h>
|
||||
#elif defined( TORQUE_OS_MAC )
|
||||
# include <sys/types.h>
|
||||
# include <sys/sysctl.h>
|
||||
#else
|
||||
#error Not implemented on platform.
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
namespace CPUInfo {
|
||||
|
||||
#define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported
|
||||
#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
|
||||
// processors per physical processor when execute cpuid with
|
||||
// eax set to 1
|
||||
#define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
|
||||
// per physical processor when execute cpuid with
|
||||
// eax set to 4.
|
||||
|
||||
|
||||
#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
|
||||
// initial APIC ID for the processor this code is running on.
|
||||
|
||||
|
||||
#ifndef TORQUE_OS_MAC
|
||||
static U32 CpuIDSupported(void);
|
||||
static U32 find_maskwidth(unsigned int);
|
||||
static U32 HWD_MTSupported(void);
|
||||
static U32 MaxLogicalProcPerPhysicalProc(void);
|
||||
static U32 MaxCorePerPhysicalProc(void);
|
||||
static U8 GetAPIC_ID(void);
|
||||
static U8 GetNzbSubID(U8, U8, U8);
|
||||
#endif
|
||||
|
||||
static char g_s3Levels[2048];
|
||||
|
||||
#ifndef TORQUE_OS_MAC
|
||||
|
||||
//
|
||||
// CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return
|
||||
// the maximum supported standard function.
|
||||
//
|
||||
static U32 CpuIDSupported(void)
|
||||
{
|
||||
U32 maxInputValue = 0;
|
||||
// If CPUID instruction is supported
|
||||
#ifdef TORQUE_COMPILER_GCC
|
||||
try
|
||||
{
|
||||
// call cpuid with eax = 0
|
||||
asm
|
||||
(
|
||||
"pushl %%ebx\n\t"
|
||||
"xorl %%eax,%%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"popl %%ebx\n\t"
|
||||
: "=a" (maxInputValue)
|
||||
:
|
||||
: "%ecx", "%edx"
|
||||
);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return(0); // cpuid instruction is unavailable
|
||||
}
|
||||
#elif defined( TORQUE_COMPILER_VISUALC )
|
||||
try
|
||||
{
|
||||
// call cpuid with eax = 0
|
||||
__asm
|
||||
{
|
||||
xor eax, eax
|
||||
cpuid
|
||||
mov maxInputValue, eax
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// cpuid instruction is unavailable
|
||||
}
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
|
||||
return maxInputValue;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Function returns the maximum cores per physical package. Note that the number of
|
||||
// AVAILABLE cores per physical to be used by an application might be less than this
|
||||
// maximum value.
|
||||
//
|
||||
|
||||
static U32 MaxCorePerPhysicalProc(void)
|
||||
{
|
||||
|
||||
U32 Regeax = 0;
|
||||
|
||||
if (!HWD_MTSupported()) return (U32) 1; // Single core
|
||||
#ifdef TORQUE_COMPILER_GCC
|
||||
{
|
||||
asm
|
||||
(
|
||||
"pushl %ebx\n\t"
|
||||
"xorl %eax, %eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"cmpl $4, %eax\n\t" // check if cpuid supports leaf 4
|
||||
"jl .single_core\n\t" // Single core
|
||||
"movl $4, %eax\n\t"
|
||||
"movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports
|
||||
"popl %ebx\n\t"
|
||||
); // at least one valid cache level
|
||||
asm
|
||||
(
|
||||
"cpuid"
|
||||
: "=a" (Regeax)
|
||||
:
|
||||
: "%ecx", "%edx"
|
||||
);
|
||||
asm
|
||||
(
|
||||
"jmp .multi_core\n"
|
||||
".single_core:\n\t"
|
||||
"xor %eax, %eax\n"
|
||||
".multi_core:"
|
||||
);
|
||||
}
|
||||
#elif defined( TORQUE_COMPILER_VISUALC )
|
||||
__asm
|
||||
{
|
||||
xor eax, eax
|
||||
cpuid
|
||||
cmp eax, 4 // check if cpuid supports leaf 4
|
||||
jl single_core // Single core
|
||||
mov eax, 4
|
||||
mov ecx, 0 // start with index = 0; Leaf 4 reports
|
||||
cpuid // at least one valid cache level
|
||||
mov Regeax, eax
|
||||
jmp multi_core
|
||||
|
||||
single_core:
|
||||
xor eax, eax
|
||||
|
||||
multi_core:
|
||||
|
||||
}
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// The function returns 0 when the hardware multi-threaded bit is not set.
|
||||
//
|
||||
static U32 HWD_MTSupported(void)
|
||||
{
|
||||
|
||||
|
||||
U32 Regedx = 0;
|
||||
|
||||
|
||||
if ((CpuIDSupported() >= 1))
|
||||
{
|
||||
#ifdef TORQUE_COMPILER_GCC
|
||||
asm
|
||||
(
|
||||
"pushl %%ebx\n\t"
|
||||
"movl $1,%%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"popl %%ebx\n\t"
|
||||
: "=d" (Regedx)
|
||||
:
|
||||
: "%eax","%ecx"
|
||||
);
|
||||
#elif defined( TORQUE_COMPILER_VISUALC )
|
||||
__asm
|
||||
{
|
||||
mov eax, 1
|
||||
cpuid
|
||||
mov Regedx, edx
|
||||
}
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
}
|
||||
|
||||
return (Regedx & HWD_MT_BIT);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Function returns the maximum logical processors per physical package. Note that the number of
|
||||
// AVAILABLE logical processors per physical to be used by an application might be less than this
|
||||
// maximum value.
|
||||
//
|
||||
static U32 MaxLogicalProcPerPhysicalProc(void)
|
||||
{
|
||||
|
||||
U32 Regebx = 0;
|
||||
|
||||
if (!HWD_MTSupported()) return (U32) 1;
|
||||
#ifdef TORQUE_COMPILER_GCC
|
||||
asm
|
||||
(
|
||||
"movl $1,%%eax\n\t"
|
||||
"cpuid"
|
||||
: "=b" (Regebx)
|
||||
:
|
||||
: "%eax","%ecx","%edx"
|
||||
);
|
||||
#elif defined( TORQUE_COMPILER_VISUALC )
|
||||
__asm
|
||||
{
|
||||
mov eax, 1
|
||||
cpuid
|
||||
mov Regebx, ebx
|
||||
}
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static U8 GetAPIC_ID(void)
|
||||
{
|
||||
|
||||
U32 Regebx = 0;
|
||||
#ifdef TORQUE_COMPILER_GCC
|
||||
asm
|
||||
(
|
||||
"movl $1, %%eax\n\t"
|
||||
"cpuid"
|
||||
: "=b" (Regebx)
|
||||
:
|
||||
: "%eax","%ecx","%edx"
|
||||
);
|
||||
|
||||
#elif defined( TORQUE_COMPILER_VISUALC )
|
||||
__asm
|
||||
{
|
||||
mov eax, 1
|
||||
cpuid
|
||||
mov Regebx, ebx
|
||||
}
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
|
||||
return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
|
||||
|
||||
}
|
||||
|
||||
//
|
||||
// Determine the width of the bit field that can represent the value count_item.
|
||||
//
|
||||
U32 find_maskwidth(U32 CountItem)
|
||||
{
|
||||
U32 MaskWidth,
|
||||
count = CountItem;
|
||||
#ifdef TORQUE_COMPILER_GCC
|
||||
asm
|
||||
(
|
||||
#ifdef __x86_64__ // define constant to compile
|
||||
"push %%rcx\n\t" // under 64-bit Linux
|
||||
"push %%rax\n\t"
|
||||
#else
|
||||
"pushl %%ecx\n\t"
|
||||
"pushl %%eax\n\t"
|
||||
#endif
|
||||
// "movl $count, %%eax\n\t" //done by Assembler below
|
||||
"xorl %%ecx, %%ecx"
|
||||
// "movl %%ecx, MaskWidth\n\t" //done by Assembler below
|
||||
: "=c" (MaskWidth)
|
||||
: "a" (count)
|
||||
// : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
|
||||
//to put them back when we are done
|
||||
);
|
||||
asm
|
||||
(
|
||||
"decl %%eax\n\t"
|
||||
"bsrw %%ax,%%cx\n\t"
|
||||
"jz next\n\t"
|
||||
"incw %%cx\n\t"
|
||||
// "movl %%ecx, MaskWidth\n" //done by Assembler below
|
||||
: "=c" (MaskWidth)
|
||||
:
|
||||
);
|
||||
asm
|
||||
(
|
||||
"next:\n\t"
|
||||
#ifdef __x86_64__
|
||||
"pop %rax\n\t"
|
||||
"pop %rcx"
|
||||
#else
|
||||
"popl %eax\n\t"
|
||||
"popl %ecx"
|
||||
#endif
|
||||
);
|
||||
|
||||
#elif defined( TORQUE_COMPILER_VISUALC )
|
||||
__asm
|
||||
{
|
||||
mov eax, count
|
||||
mov ecx, 0
|
||||
mov MaskWidth, ecx
|
||||
dec eax
|
||||
bsr cx, ax
|
||||
jz next
|
||||
inc cx
|
||||
mov MaskWidth, ecx
|
||||
next:
|
||||
|
||||
}
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
return MaskWidth;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value
|
||||
//
|
||||
static U8 GetNzbSubID(U8 FullID,
|
||||
U8 MaxSubIDValue,
|
||||
U8 ShiftCount)
|
||||
{
|
||||
U32 MaskWidth;
|
||||
U8 MaskBits;
|
||||
|
||||
MaskWidth = find_maskwidth((U32) MaxSubIDValue);
|
||||
MaskBits = (0xff << ShiftCount) ^
|
||||
((U8) (0xff << (ShiftCount + MaskWidth)));
|
||||
|
||||
return (FullID & MaskBits);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
//
|
||||
//
|
||||
EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
|
||||
{
|
||||
EConfig StatusFlag = CONFIG_UserConfigIssue;
|
||||
|
||||
g_s3Levels[0] = 0;
|
||||
TotAvailCore = 1;
|
||||
PhysicalNum = 1;
|
||||
|
||||
U32 numLPEnabled = 0;
|
||||
S32 MaxLPPerCore = 1;
|
||||
|
||||
#ifdef TORQUE_OS_MAC
|
||||
|
||||
//FIXME: This isn't a proper port but more or less just some sneaky cheating
|
||||
// to get around having to mess with yet another crap UNIX-style API. Seems
|
||||
// like there isn't a way to do this that's working across all OSX incarnations
|
||||
// and machine configurations anyway.
|
||||
|
||||
S32 numCPUs;
|
||||
S32 numPackages;
|
||||
|
||||
// Get the number of CPUs.
|
||||
|
||||
size_t len = sizeof( numCPUs );
|
||||
if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
|
||||
return CONFIG_UserConfigIssue;
|
||||
|
||||
// Get the number of packages.
|
||||
len = sizeof( numPackages );
|
||||
if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
|
||||
return CONFIG_UserConfigIssue;
|
||||
|
||||
TotAvailCore = numCPUs;
|
||||
TotAvailLogical = numCPUs;
|
||||
PhysicalNum = numPackages;
|
||||
#else
|
||||
|
||||
U32 dwAffinityMask;
|
||||
S32 j = 0;
|
||||
U8 apicID, PackageIDMask;
|
||||
U8 tblPkgID[256], tblCoreID[256], tblSMTID[256];
|
||||
char tmp[256];
|
||||
|
||||
#ifdef TORQUE_OS_LINUX
|
||||
//we need to make sure that this process is allowed to run on
|
||||
//all of the logical processors that the OS itself can run on.
|
||||
//A process could acquire/inherit affinity settings that restricts the
|
||||
// current process to run on a subset of all logical processor visible to OS.
|
||||
|
||||
// Linux doesn't easily allow us to look at the Affinity Bitmask directly,
|
||||
// but it does provide an API to test affinity maskbits of the current process
|
||||
// against each logical processor visible under OS.
|
||||
S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many
|
||||
//CPUs are currently enabled.
|
||||
|
||||
//this will tell us which processors this process can run on.
|
||||
cpu_set_t allowedCPUs;
|
||||
sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
|
||||
|
||||
for (S32 i = 0; i < sysNumProcs; i++ )
|
||||
{
|
||||
if ( CPU_ISSET(i, &allowedCPUs) == 0 )
|
||||
return CONFIG_UserConfigIssue;
|
||||
}
|
||||
#elif defined( TORQUE_OS_WIN )
|
||||
DWORD dwProcessAffinity, dwSystemAffinity;
|
||||
GetProcessAffinityMask(GetCurrentProcess(),
|
||||
&dwProcessAffinity,
|
||||
&dwSystemAffinity);
|
||||
if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled
|
||||
return CONFIG_UserConfigIssue;
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
|
||||
// Assume that cores within a package have the SAME number of
|
||||
// logical processors. Also, values returned by
|
||||
// MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
|
||||
// to be power of 2.
|
||||
|
||||
MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
|
||||
dwAffinityMask = 1;
|
||||
|
||||
#ifdef TORQUE_OS_LINUX
|
||||
cpu_set_t currentCPU;
|
||||
while ( j < sysNumProcs )
|
||||
{
|
||||
CPU_ZERO(¤tCPU);
|
||||
CPU_SET(j, ¤tCPU);
|
||||
if ( sched_setaffinity (0, sizeof(currentCPU), ¤tCPU) == 0 )
|
||||
{
|
||||
sleep(0); // Ensure system to switch to the right CPU
|
||||
#elif defined( TORQUE_OS_WIN )
|
||||
while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
|
||||
{
|
||||
if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
|
||||
{
|
||||
Sleep(0); // Ensure system to switch to the right CPU
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
apicID = GetAPIC_ID();
|
||||
|
||||
|
||||
// Store SMT ID and core ID of each logical processor
|
||||
// Shift vlaue for SMT ID is 0
|
||||
// Shift value for core ID is the mask width for maximum logical
|
||||
// processors per core
|
||||
|
||||
tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0);
|
||||
U8 maxCorePPP = MaxCorePerPhysicalProc();
|
||||
U8 maskWidth = find_maskwidth(MaxLPPerCore);
|
||||
tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
|
||||
|
||||
// Extract package ID, assume single cluster.
|
||||
// Shift value is the mask width for max Logical per package
|
||||
|
||||
PackageIDMask = (unsigned char) (0xff <<
|
||||
find_maskwidth(MaxLogicalProcPerPhysicalProc()));
|
||||
|
||||
tblPkgID[j] = apicID & PackageIDMask;
|
||||
sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n",
|
||||
dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
|
||||
dStrcat(g_s3Levels, tmp, 2048);
|
||||
|
||||
numLPEnabled ++; // Number of available logical processors in the system.
|
||||
|
||||
} // if
|
||||
|
||||
j++;
|
||||
dwAffinityMask = 1 << j;
|
||||
} // while
|
||||
|
||||
// restore the affinity setting to its original state
|
||||
#ifdef TORQUE_OS_LINUX
|
||||
sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
|
||||
sleep(0);
|
||||
#elif defined( TORQUE_OS_WIN )
|
||||
SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
|
||||
Sleep(0);
|
||||
#else
|
||||
# error Not implemented.
|
||||
#endif
|
||||
TotAvailLogical = numLPEnabled;
|
||||
|
||||
//
|
||||
// Count available cores (TotAvailCore) in the system
|
||||
//
|
||||
U8 CoreIDBucket[256];
|
||||
DWORD ProcessorMask, pCoreMask[256];
|
||||
U32 i, ProcessorNum;
|
||||
|
||||
CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
|
||||
ProcessorMask = 1;
|
||||
pCoreMask[0] = ProcessorMask;
|
||||
|
||||
for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
|
||||
{
|
||||
ProcessorMask <<= 1;
|
||||
for (i = 0; i < TotAvailCore; i++)
|
||||
{
|
||||
// Comparing bit-fields of logical processors residing in different packages
|
||||
// Assuming the bit-masks are the same on all processors in the system.
|
||||
if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
|
||||
{
|
||||
pCoreMask[i] |= ProcessorMask;
|
||||
break;
|
||||
}
|
||||
|
||||
} // for i
|
||||
|
||||
if (i == TotAvailCore) // did not match any bucket. Start a new one.
|
||||
{
|
||||
CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
|
||||
pCoreMask[i] = ProcessorMask;
|
||||
|
||||
TotAvailCore++; // Number of available cores in the system
|
||||
|
||||
}
|
||||
|
||||
} // for ProcessorNum
|
||||
|
||||
|
||||
//
|
||||
// Count physical processor (PhysicalNum) in the system
|
||||
//
|
||||
U8 PackageIDBucket[256];
|
||||
DWORD pPackageMask[256];
|
||||
|
||||
PackageIDBucket[0] = tblPkgID[0];
|
||||
ProcessorMask = 1;
|
||||
pPackageMask[0] = ProcessorMask;
|
||||
|
||||
for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
|
||||
{
|
||||
ProcessorMask <<= 1;
|
||||
for (i = 0; i < PhysicalNum; i++)
|
||||
{
|
||||
// Comparing bit-fields of logical processors residing in different packages
|
||||
// Assuming the bit-masks are the same on all processors in the system.
|
||||
if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
|
||||
{
|
||||
pPackageMask[i] |= ProcessorMask;
|
||||
break;
|
||||
}
|
||||
|
||||
} // for i
|
||||
|
||||
if (i == PhysicalNum) // did not match any bucket. Start a new one.
|
||||
{
|
||||
PackageIDBucket[i] = tblPkgID[ProcessorNum];
|
||||
pPackageMask[i] = ProcessorMask;
|
||||
|
||||
PhysicalNum++; // Total number of physical processors in the system
|
||||
|
||||
}
|
||||
|
||||
} // for ProcessorNum
|
||||
#endif
|
||||
|
||||
//
|
||||
// Check to see if the system is multi-core
|
||||
// Check if the system is hyper-threading
|
||||
//
|
||||
if (TotAvailCore > PhysicalNum)
|
||||
{
|
||||
// Multi-core
|
||||
if (MaxLPPerCore == 1)
|
||||
StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
|
||||
else if (numLPEnabled > TotAvailCore)
|
||||
StatusFlag = CONFIG_MultiCoreAndHTEnabled;
|
||||
else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// Single-core
|
||||
if (MaxLPPerCore == 1)
|
||||
StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
|
||||
else if (numLPEnabled > TotAvailCore)
|
||||
StatusFlag = CONFIG_SingleCoreHTEnabled;
|
||||
else StatusFlag = CONFIG_SingleCoreHTDisabled;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
return StatusFlag;
|
||||
}
|
||||
|
||||
} // namespace CPUInfo
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -29,13 +29,10 @@ namespace CPUInfo
|
|||
{
|
||||
enum EConfig
|
||||
{
|
||||
CONFIG_UserConfigIssue,
|
||||
CONFIG_SingleCoreHTEnabled,
|
||||
CONFIG_SingleCoreHTDisabled,
|
||||
CONFIG_SingleCoreAndHTNotCapable,
|
||||
CONFIG_MultiCoreAndHTNotCapable,
|
||||
CONFIG_MultiCoreAndHTEnabled,
|
||||
CONFIG_MultiCoreAndHTDisabled,
|
||||
};
|
||||
|
||||
inline bool isMultiCore( EConfig config )
|
||||
|
|
@ -44,7 +41,6 @@ namespace CPUInfo
|
|||
{
|
||||
case CONFIG_MultiCoreAndHTNotCapable:
|
||||
case CONFIG_MultiCoreAndHTEnabled:
|
||||
case CONFIG_MultiCoreAndHTDisabled:
|
||||
return true;
|
||||
|
||||
default:
|
||||
|
|
@ -65,11 +61,10 @@ namespace CPUInfo
|
|||
}
|
||||
}
|
||||
|
||||
EConfig CPUCount( U32& totalAvailableLogical,
|
||||
U32& totalAvailableCores,
|
||||
U32& numPhysical );
|
||||
|
||||
EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores );
|
||||
} // namespace CPUInfo
|
||||
|
||||
void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand);
|
||||
|
||||
#endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_
|
||||
|
||||
|
|
|
|||
|
|
@ -1,128 +0,0 @@
|
|||
;-----------------------------------------------------------------------------
|
||||
; Copyright (c) 2012 GarageGames, LLC
|
||||
;
|
||||
; Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
; of this software and associated documentation files (the "Software"), to
|
||||
; deal in the Software without restriction, including without limitation the
|
||||
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
; sell copies of the Software, and to permit persons to whom the Software is
|
||||
; furnished to do so, subject to the following conditions:
|
||||
;
|
||||
; The above copyright notice and this permission notice shall be included in
|
||||
; all copies or substantial portions of the Software.
|
||||
;
|
||||
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
; IN THE SOFTWARE.
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
segment .text
|
||||
|
||||
; syntax: export_fn <function name>
|
||||
%macro export_fn 1
|
||||
%ifidn __OUTPUT_FORMAT__, elf
|
||||
; No underscore needed for ELF object files
|
||||
global %1
|
||||
%1:
|
||||
%else
|
||||
global _%1
|
||||
_%1:
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
; push registers
|
||||
%macro pushreg 0
|
||||
; pushad
|
||||
push ebx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
%endmacro
|
||||
|
||||
; pop registers
|
||||
%macro popreg 0
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
; popad
|
||||
%endmacro
|
||||
|
||||
; void detectX86CPUInfo(char *vendor, U32 *processor, U32 *properties);
|
||||
export_fn detectX86CPUInfo
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
|
||||
pushreg
|
||||
|
||||
push edx
|
||||
push ecx
|
||||
pushfd
|
||||
pushfd ; save EFLAGS to stack
|
||||
pop eax ; move EFLAGS into EAX
|
||||
mov ebx, eax
|
||||
xor eax, 0x200000 ; flip bit 21
|
||||
push eax
|
||||
popfd ; restore EFLAGS
|
||||
pushfd
|
||||
pop eax
|
||||
cmp eax, ebx
|
||||
jz EXIT ; doesn't support CPUID instruction
|
||||
|
||||
;
|
||||
; get vendor information using CPUID eax == 0
|
||||
xor eax, eax
|
||||
cpuid
|
||||
|
||||
; store the vendor tag (12 bytes in ebx, edx, ecx) in the first parameter,
|
||||
; which should be a char[13]
|
||||
push eax ; save eax
|
||||
mov eax, [ebp+8] ; store the char* address in eax
|
||||
mov [eax], ebx ; move ebx into the first 4 bytes
|
||||
add eax, 4 ; advance the char* 4 bytes
|
||||
mov [eax], edx ; move edx into the next 4 bytes
|
||||
add eax, 4 ; advance the char* 4 bytes
|
||||
mov [eax], ecx ; move ecx into the last 4 bytes
|
||||
pop eax ; restore eax
|
||||
|
||||
; get generic extended CPUID info
|
||||
mov eax, 1
|
||||
cpuid ; eax=1, so cpuid queries feature information
|
||||
|
||||
and eax, 0x0fff3fff
|
||||
push ecx
|
||||
mov ecx, [ebp+12]
|
||||
mov [ecx], eax ; just store the model bits in processor param
|
||||
mov ecx, [ebp+16]
|
||||
mov [ecx], edx ; set properties param
|
||||
pop ecx
|
||||
|
||||
; want to check for 3DNow(tm).
|
||||
; need to see if extended cpuid functions present.
|
||||
mov eax, 0x80000000
|
||||
cpuid
|
||||
cmp eax, 0x80000000
|
||||
jbe MAYBE_3DLATER
|
||||
mov eax, 0x80000001
|
||||
cpuid
|
||||
; 3DNow if bit 31 set -> put bit in our properties
|
||||
and edx, 0x80000000
|
||||
push eax
|
||||
mov eax, [ebp+16]
|
||||
or [eax], edx
|
||||
pop eax
|
||||
MAYBE_3DLATER:
|
||||
EXIT:
|
||||
popfd
|
||||
pop ecx
|
||||
pop edx
|
||||
|
||||
popreg
|
||||
|
||||
pop ebp
|
||||
ret
|
||||
|
|
@ -322,10 +322,9 @@ ThreadPool::ThreadPool( const char* name, U32 numThreads )
|
|||
// Platform::SystemInfo will not yet have been initialized.
|
||||
|
||||
U32 numLogical = 0;
|
||||
U32 numPhysical = 0;
|
||||
U32 numCores = 0;
|
||||
|
||||
CPUInfo::CPUCount( numLogical, numCores, numPhysical );
|
||||
CPUInfo::CPUCount( numLogical, numCores );
|
||||
|
||||
const U32 baseCount = getMax( numLogical, numCores );
|
||||
mNumThreads = (baseCount > 0) ? baseCount : 2;
|
||||
|
|
|
|||
|
|
@ -35,15 +35,6 @@
|
|||
// we now have to use NSProcessInfo
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
//recently removed in Xcode 8 - most likely don't need these anymore
|
||||
#ifndef CPUFAMILY_INTEL_YONAH
|
||||
#define CPUFAMILY_INTEL_YONAH 0x73d67300
|
||||
#endif
|
||||
|
||||
#ifndef CPUFAMILY_INTEL_MEROM
|
||||
#define CPUFAMILY_INTEL_MEROM 0x426f69ef
|
||||
#endif
|
||||
|
||||
// Original code by Sean O'Brien (http://www.garagegames.com/community/forums/viewthread/81815).
|
||||
|
||||
|
||||
|
|
@ -89,204 +80,183 @@ int _getSysCTLvalue(const char key[], T * dest) {
|
|||
|
||||
Platform::SystemInfo_struct Platform::SystemInfo;
|
||||
|
||||
#define BASE_MHZ_SPEED 0
|
||||
//TODO update cpu list
|
||||
#define BASE_MHZ_SPEED 1000
|
||||
#define BASE_APPLE_SILICON_MHZ_SPEED 3200
|
||||
|
||||
static void detectCpuFeatures(U32 &procflags)
|
||||
{
|
||||
// Now we can directly query the system about a litany of "Optional" processor capabilities
|
||||
// and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
|
||||
// a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists
|
||||
// >>>> BUT <<<<<
|
||||
// it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's
|
||||
// actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
|
||||
|
||||
int err;
|
||||
U32 lraw;
|
||||
|
||||
// All Cpus have fpu
|
||||
procflags = CPU_PROP_C | CPU_PROP_FPU;
|
||||
|
||||
#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
|
||||
|
||||
// List of chip-specific features
|
||||
err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_MMX;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_SSE;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_SSE2;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_SSE3;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_SSE3ex;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_SSE4_1;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_SSE4_2;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.avx1_0", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_AVX;
|
||||
|
||||
#elif defined(TORQUE_CPU_ARM64)
|
||||
|
||||
err = _getSysCTLvalue<U32>("hw.optional.neon", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_NEON;
|
||||
|
||||
#endif
|
||||
|
||||
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
|
||||
if ((err==0)&&(lraw>1))
|
||||
procflags |= CPU_PROP_MP;
|
||||
err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
|
||||
if ((err==0)&&(lraw==1))
|
||||
procflags |= CPU_PROP_64bit;
|
||||
err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
|
||||
if ((err==0)&&(lraw==1234))
|
||||
procflags |= CPU_PROP_LE;
|
||||
}
|
||||
|
||||
void Processor::init()
|
||||
{
|
||||
U32 procflags;
|
||||
U32 procflags = 0;
|
||||
int err, cpufam, cputype, cpusub;
|
||||
char buf[255];
|
||||
U32 lraw;
|
||||
U64 llraw;
|
||||
|
||||
Con::printf( "System & Processor Information:" );
|
||||
|
||||
// Gestalt has been deprecated since Mac OSX Mountain Lion and has stopped working on
|
||||
// Mac OSX Yosemite. we have to use NSProcessInfo now.
|
||||
// Availability: Mac OS 10.2 or greater.
|
||||
NSString *osVersionStr = [[NSProcessInfo processInfo] operatingSystemVersionString];
|
||||
Con::printf( " OSX Version: %s", [osVersionStr UTF8String]);
|
||||
|
||||
err = _getSysCTLstring("kern.ostype", buf, sizeof(buf));
|
||||
if (err)
|
||||
Con::printf( " Unable to determine OS type\n" );
|
||||
else
|
||||
Con::printf( " Mac OS Kernel name: %s", buf);
|
||||
|
||||
err = _getSysCTLstring("kern.osrelease", buf, sizeof(buf));
|
||||
if (err)
|
||||
Con::printf( " Unable to determine OS release number\n" );
|
||||
else
|
||||
Con::printf( " Mac OS Kernel version: %s", buf );
|
||||
|
||||
|
||||
S32 ramMB;
|
||||
err = _getSysCTLvalue<U64>("hw.memsize", &llraw);
|
||||
if (err)
|
||||
Con::printf( " Unable to determine amount of physical RAM\n" );
|
||||
ramMB = 512;
|
||||
else
|
||||
Con::printf( " Physical memory installed: %d MB", (llraw >> 20));
|
||||
ramMB = llraw >> 20;
|
||||
|
||||
err = _getSysCTLvalue<U32>("hw.usermem", &lraw);
|
||||
if (err)
|
||||
Con::printf( " Unable to determine available user address space\n");
|
||||
else
|
||||
Con::printf( " Addressable user memory: %d MB", (lraw >> 20));
|
||||
|
||||
////////////////////////////////
|
||||
// Values for the Family Type, CPU Type and CPU Subtype are defined in the
|
||||
// SDK files for the Mach Kernel ==> mach/machine.h
|
||||
////////////////////////////////
|
||||
|
||||
// CPU Family, Type, and Subtype
|
||||
cpufam = 0;
|
||||
cputype = 0;
|
||||
cpusub = 0;
|
||||
err = _getSysCTLvalue<U32>("hw.cpufamily", &lraw);
|
||||
if (err)
|
||||
Con::printf( " Unable to determine 'family' of CPU\n");
|
||||
else {
|
||||
cpufam = (int) lraw;
|
||||
err = _getSysCTLvalue<U32>("hw.cputype", &lraw);
|
||||
if (err)
|
||||
Con::printf( " Unable to determine CPU type\n");
|
||||
else {
|
||||
cputype = (int) lraw;
|
||||
err = _getSysCTLvalue<U32>("hw.cpusubtype", &lraw);
|
||||
if (err)
|
||||
Con::printf( " Unable to determine CPU subtype\n");
|
||||
else
|
||||
cpusub = (int) lraw;
|
||||
// If we've made it this far,
|
||||
Con::printf( " Installed processor ID: Family 0x%08x Type %d Subtype %d",cpufam, cputype,cpusub);
|
||||
}
|
||||
}
|
||||
|
||||
// The Gestalt version was known to have issues with some Processor Upgrade cards
|
||||
// but it is uncertain whether this version has similar issues.
|
||||
char brandString[256];
|
||||
err = _getSysCTLstring("machdep.cpu.brand_string", brandString, sizeof(brandString));
|
||||
if (err)
|
||||
brandString[0] = '\0';
|
||||
|
||||
char vendor[256];
|
||||
err = _getSysCTLstring("machdep.cpu.vendor", vendor, sizeof(vendor));
|
||||
if (err)
|
||||
vendor[0] = '\0';
|
||||
|
||||
// Note: hw.cpufrequency seems to be missing on the M1. For Apple Silicon,
|
||||
// we will assume the base frequency of the M1 which is 3.2ghz
|
||||
err = _getSysCTLvalue<U64>("hw.cpufrequency", &llraw);
|
||||
if (err) {
|
||||
#if defined(TORQUE_CPU_ARM64)
|
||||
llraw = BASE_APPLE_SILICON_MHZ_SPEED;
|
||||
#else
|
||||
llraw = BASE_MHZ_SPEED;
|
||||
Con::printf( " Unable to determine CPU Frequency. Defaulting to %d MHz\n", llraw);
|
||||
#endif
|
||||
} else {
|
||||
llraw /= 1000000;
|
||||
Con::printf( " Installed processor clock frequency: %d MHz", llraw);
|
||||
}
|
||||
Platform::SystemInfo.processor.mhz = (unsigned int)llraw;
|
||||
|
||||
// Here's one that the original version of this routine couldn't do -- number
|
||||
// of processors (cores)
|
||||
U32 ncpu = 1;
|
||||
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
|
||||
if (err)
|
||||
Con::printf( " Unable to determine number of processor cores\n");
|
||||
else
|
||||
{
|
||||
ncpu = lraw;
|
||||
Con::printf( " Installed/available processor cores: %d", lraw);
|
||||
}
|
||||
|
||||
// Now use CPUFAM to determine and then store the processor type
|
||||
// and 'friendly name' in GG-accessible structure. Note that since
|
||||
// we have access to the Family code, the Type and Subtypes are useless.
|
||||
//
|
||||
// NOTE: Even this level of detail is almost assuredly not needed anymore
|
||||
// and the Optional Capability flags (further down) should be more than enough.
|
||||
switch(cpufam)
|
||||
{
|
||||
case CPUFAMILY_INTEL_YONAH:
|
||||
Platform::SystemInfo.processor.type = CPU_Intel_Core;
|
||||
if( ncpu == 2 )
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core Duo");
|
||||
else
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core");
|
||||
break;
|
||||
case CPUFAMILY_INTEL_PENRYN:
|
||||
case CPUFAMILY_INTEL_MEROM:
|
||||
Platform::SystemInfo.processor.type = CPU_Intel_Core2;
|
||||
if( ncpu == 4 )
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Quad");
|
||||
else
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Duo");
|
||||
break;
|
||||
|
||||
case CPUFAMILY_INTEL_NEHALEM:
|
||||
Platform::SystemInfo.processor.type = CPU_Intel_Core2;
|
||||
Platform::SystemInfo.processor.name = StringTable->insert( "Intel 'Nehalem' Core Processor" );
|
||||
break;
|
||||
|
||||
default:
|
||||
// explain why we can't get the processor type.
|
||||
Con::warnf( " Unknown Processor (family, type, subtype): 0x%x\t%d %d", cpufam, cputype, cpusub);
|
||||
// for now, identify it as an x86 processor, because Apple is moving to Intel chips...
|
||||
Platform::SystemInfo.processor.type = CPU_X86Compatible;
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Unknown Processor, assuming x86 Compatible");
|
||||
break;
|
||||
}
|
||||
// Now we can directly query the system about a litany of "Optional" processor capabilities
|
||||
// and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
|
||||
// a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists
|
||||
// >>>> BUT <<<<<
|
||||
// it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's
|
||||
// actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
|
||||
procflags = 0;
|
||||
// Seriously this one should be an Assert()
|
||||
err = _getSysCTLvalue<U32>("hw.optional.floatingpoint", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_FPU;
|
||||
// List of chip-specific features
|
||||
err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_MMX;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE2;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3xt;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_1;
|
||||
err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_2;
|
||||
|
||||
// Finally some architecture-wide settings
|
||||
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
|
||||
if ((err==0)&&(lraw>1)) procflags |= CPU_PROP_MP;
|
||||
err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
|
||||
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_64bit;
|
||||
err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
|
||||
if ((err==0)&&(lraw==1234)) procflags |= CPU_PROP_LE;
|
||||
|
||||
Platform::SystemInfo.processor.properties = procflags;
|
||||
|
||||
Con::printf( "%s, %2.2f GHz", Platform::SystemInfo.processor.name, F32( Platform::SystemInfo.processor.mhz ) / 1000.0 );
|
||||
detectCpuFeatures(procflags);
|
||||
|
||||
Platform::SystemInfo.processor.properties = procflags;
|
||||
SetProcessorInfo(Platform::SystemInfo.processor, vendor, brandString);
|
||||
|
||||
|
||||
Con::printf("System & Processor Information:");
|
||||
Con::printf(" MacOS Version: %s", [osVersionStr UTF8String]);
|
||||
Con::printf(" Physical memory installed: %d MB", ramMB);
|
||||
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
|
||||
Con::printf( " MMX detected");
|
||||
Con::printf(" MMX detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
|
||||
Con::printf( " SSE detected");
|
||||
Con::printf(" SSE detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
|
||||
Con::printf( " SSE2 detected");
|
||||
Con::printf(" SSE2 detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
|
||||
Con::printf( " SSE3 detected");
|
||||
Con::printf(" SSE3 detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
|
||||
Con::printf(" SSE3ex detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
|
||||
Con::printf( " SSE4.1 detected");
|
||||
Con::printf(" SSE4.1 detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
|
||||
Con::printf( " SSE4.2 detected");
|
||||
|
||||
Con::printf(" SSE4.2 detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
|
||||
Con::printf(" AVX detected");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_NEON)
|
||||
Con::printf(" Neon detected");
|
||||
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
|
||||
Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
|
||||
|
||||
Con::printf( "" );
|
||||
|
||||
// Trigger the signal
|
||||
Platform::SystemInfoReady.trigger();
|
||||
}
|
||||
|
||||
|
||||
namespace CPUInfo {
|
||||
EConfig CPUCount(U32 &logical, U32 &numCores, U32 &numPhysical) {
|
||||
// todo properly implement this
|
||||
logical = [[NSProcessInfo processInfo] activeProcessorCount];
|
||||
numCores = [[NSProcessInfo processInfo] activeProcessorCount];
|
||||
numPhysical = [[NSProcessInfo processInfo] processorCount];
|
||||
EConfig CPUCount(U32 &logical, U32 &physical) {
|
||||
U32 lraw;
|
||||
int err;
|
||||
|
||||
// todo check for hyperthreading
|
||||
if (numCores > 1)
|
||||
return CONFIG_MultiCoreAndHTNotCapable;
|
||||
return CONFIG_SingleCoreAndHTNotCapable;
|
||||
err = _getSysCTLvalue<U32>("hw.physicalcpu", &lraw);
|
||||
if (err == 0)
|
||||
physical = lraw;
|
||||
else
|
||||
physical = 1;
|
||||
|
||||
err = _getSysCTLvalue<U32>("hw.logicalcpu", &lraw);
|
||||
if (err == 0)
|
||||
{
|
||||
logical = lraw;
|
||||
}
|
||||
else
|
||||
{
|
||||
// fallback to querying the number of cpus. If that fails, then assume same as number of cores
|
||||
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
|
||||
if (err == 0)
|
||||
logical = lraw;
|
||||
else
|
||||
logical = physical;
|
||||
}
|
||||
|
||||
const bool smtEnabled = logical > physical;
|
||||
|
||||
if (physical == 1)
|
||||
return smtEnabled ? CONFIG_SingleCoreHTEnabled : CONFIG_SingleCoreAndHTNotCapable;
|
||||
|
||||
return smtEnabled ? CONFIG_MultiCoreAndHTEnabled : CONFIG_MultiCoreAndHTNotCapable;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,8 +27,6 @@
|
|||
#include "console/engineAPI.h"
|
||||
|
||||
extern void mInstallLibrary_C();
|
||||
extern void mInstallLibrary_Vec();
|
||||
extern void mInstall_Library_SSE();
|
||||
|
||||
static MRandomLCG sgPlatRandom;
|
||||
|
||||
|
|
@ -115,7 +113,6 @@ void Math::init(U32 properties)
|
|||
if( properties & CPU_PROP_SSE )
|
||||
{
|
||||
Con::printf( " Installing SSE extensions" );
|
||||
mInstall_Library_SSE();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -22,30 +22,233 @@
|
|||
|
||||
#ifndef __APPLE__
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "platform/platform.h"
|
||||
#include "platformPOSIX/platformPOSIX.h"
|
||||
#include "console/console.h"
|
||||
#include "core/stringTable.h"
|
||||
#include "core/strings/stringFunctions.h"
|
||||
#include <math.h>
|
||||
|
||||
#include "platform/platformCPUCount.h"
|
||||
|
||||
#include "console/console.h"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
Platform::SystemInfo_struct Platform::SystemInfo;
|
||||
|
||||
void Processor::init() {}
|
||||
static inline void rtrim(std::string &s)
|
||||
{
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
|
||||
}
|
||||
|
||||
static inline void ltrim(std::string &s)
|
||||
{
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
|
||||
}
|
||||
|
||||
static void getCPUInformation()
|
||||
{
|
||||
std::string vendorString;
|
||||
std::string brandString;
|
||||
|
||||
std::ifstream cpuInfo("/proc/cpuinfo");
|
||||
|
||||
U32 logicalCoreCount = 0;
|
||||
U32 physicalCoreCount = 1;
|
||||
|
||||
if (cpuInfo.is_open())
|
||||
{
|
||||
// Load every line of the CPU Info
|
||||
std::string line;
|
||||
|
||||
while (std::getline(cpuInfo, line))
|
||||
{
|
||||
std::string fieldName = line.substr(0, line.find(":"));
|
||||
rtrim(fieldName);
|
||||
|
||||
// Entries are newline separated
|
||||
if (fieldName == "")
|
||||
{
|
||||
++logicalCoreCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string fieldValue = line.substr(line.find(":") + 1, line.length());
|
||||
ltrim(fieldValue);
|
||||
rtrim(fieldValue);
|
||||
|
||||
// Load fields
|
||||
if (fieldName == "vendor_id")
|
||||
{
|
||||
vendorString = fieldValue.c_str();
|
||||
}
|
||||
else if (fieldName == "model name")
|
||||
{
|
||||
brandString = fieldValue.c_str();
|
||||
}
|
||||
else if (fieldName == "cpu cores")
|
||||
{
|
||||
physicalCoreCount = dAtoui(fieldValue.c_str());
|
||||
}
|
||||
else if (fieldName == "flags")
|
||||
{
|
||||
std::vector<std::string> flags;
|
||||
std::istringstream flagStream(fieldValue);
|
||||
|
||||
std::string currentFlag;
|
||||
while (std::getline(flagStream, currentFlag, ' '))
|
||||
{
|
||||
flags.push_back(currentFlag);
|
||||
}
|
||||
|
||||
// Set CPU flags
|
||||
if (std::find(flags.begin(), flags.end(), "fpu") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_FPU;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "sse3") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "avx") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_AVX;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "ssse3") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3ex;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "sse") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "sse2") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE2;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "sse4_1") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_1;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "sse4_2") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_2;
|
||||
}
|
||||
|
||||
if (std::find(flags.begin(), flags.end(), "mmx") != flags.end())
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_MMX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cpuInfo.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
logicalCoreCount = 1;
|
||||
}
|
||||
|
||||
Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount;
|
||||
Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount;
|
||||
Platform::SystemInfo.processor.isHyperThreaded = logicalCoreCount != physicalCoreCount;
|
||||
Platform::SystemInfo.processor.isMultiCore = physicalCoreCount != 1;
|
||||
Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount;
|
||||
Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount;
|
||||
if (Platform::SystemInfo.processor.isMultiCore)
|
||||
{
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_MP;
|
||||
}
|
||||
|
||||
// Load processor base frequency
|
||||
std::ifstream baseFrequencyStream("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency");
|
||||
if (baseFrequencyStream.is_open())
|
||||
{
|
||||
U32 baseFrequencyKHz = 0;
|
||||
baseFrequencyStream >> baseFrequencyKHz;
|
||||
|
||||
Platform::SystemInfo.processor.mhz = baseFrequencyKHz / 1000;
|
||||
baseFrequencyStream.close();
|
||||
}
|
||||
|
||||
SetProcessorInfo(Platform::SystemInfo.processor, vendorString.c_str(), brandString.c_str());
|
||||
}
|
||||
|
||||
void Processor::init()
|
||||
{
|
||||
getCPUInformation();
|
||||
|
||||
#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_X32)
|
||||
// Set sane default information
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE ;
|
||||
|
||||
#elif defined(TORQUE_CPU_ARM32) || defined(TORQUE_CPU_ARM64)
|
||||
Platform::SystemInfo.processor.type = CPU_ArmCompatible;
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Unknown ARM Processor");
|
||||
Platform::SystemInfo.processor.properties = CPU_PROP_C;
|
||||
#else
|
||||
#warning Unsupported CPU
|
||||
#endif
|
||||
|
||||
// Set 64bit flag
|
||||
#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_64bit;
|
||||
#endif
|
||||
|
||||
// Once CPU information is resolved, produce an output like Windows does
|
||||
Con::printf("Processor Init:");
|
||||
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
|
||||
Con::printf(" MMX detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
|
||||
Con::printf(" SSE detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
|
||||
Con::printf(" SSE2 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
|
||||
Con::printf(" SSE3 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
|
||||
Con::printf(" SSE4.1 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
|
||||
Con::printf(" SSE4.2 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
|
||||
Con::printf(" AVX detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
|
||||
Con::printf(" SSE3ex detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
|
||||
Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
|
||||
|
||||
Con::printf(" ");
|
||||
}
|
||||
|
||||
// TODO LINUX CPUInfo::CPUCount better support
|
||||
namespace CPUInfo
|
||||
{
|
||||
EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
|
||||
EConfig CPUCount(U32 &logical, U32 &physical)
|
||||
{
|
||||
PhysicalNum = TotAvailCore = 0;
|
||||
TotAvailLogical = (int)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
// We don't set logical or physical here because it's already been determined by this point
|
||||
if (Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1)
|
||||
{
|
||||
return CONFIG_SingleCoreHTEnabled;
|
||||
}
|
||||
else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors > 1)
|
||||
{
|
||||
return CONFIG_MultiCoreAndHTNotCapable;
|
||||
}
|
||||
else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1)
|
||||
{
|
||||
return CONFIG_SingleCoreAndHTNotCapable;
|
||||
}
|
||||
|
||||
return CONFIG_SingleCoreHTDisabled;
|
||||
return CONFIG_MultiCoreAndHTEnabled;
|
||||
}
|
||||
}; // namespace CPUInfo
|
||||
}; // namespace CPUInfo
|
||||
|
||||
#endif
|
||||
|
|
@ -31,11 +31,6 @@
|
|||
extern void mInstallLibrary_C();
|
||||
extern void mInstallLibrary_ASM();
|
||||
|
||||
|
||||
extern void mInstall_AMD_Math();
|
||||
extern void mInstall_Library_SSE();
|
||||
|
||||
|
||||
//--------------------------------------
|
||||
DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
|
||||
"@brief Install the math library with specified extensions.\n\n"
|
||||
|
|
@ -70,10 +65,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
|
|||
properties |= CPU_PROP_MMX;
|
||||
continue;
|
||||
}
|
||||
if (dStricmp(*argv, "3DNOW") == 0) {
|
||||
properties |= CPU_PROP_3DNOW;
|
||||
continue;
|
||||
}
|
||||
if (dStricmp(*argv, "SSE") == 0) {
|
||||
properties |= CPU_PROP_SSE;
|
||||
continue;
|
||||
|
|
@ -112,18 +103,12 @@ void Math::init(U32 properties)
|
|||
if (properties & CPU_PROP_MMX)
|
||||
{
|
||||
Con::printf(" Installing MMX extensions");
|
||||
if (properties & CPU_PROP_3DNOW)
|
||||
{
|
||||
Con::printf(" Installing 3DNow extensions");
|
||||
mInstall_AMD_Math();
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__MWERKS__) || (__MWERKS__ >= 0x2400)
|
||||
if (properties & CPU_PROP_SSE)
|
||||
{
|
||||
Con::printf(" Installing SSE extensions");
|
||||
mInstall_Library_SSE();
|
||||
}
|
||||
#endif //mwerks>2.4
|
||||
|
||||
|
|
|
|||
|
|
@ -24,13 +24,89 @@
|
|||
#include "platformWin32/platformWin32.h"
|
||||
#include "console/console.h"
|
||||
#include "core/stringTable.h"
|
||||
#include "platform/platformCPUCount.h"
|
||||
#include <math.h>
|
||||
#include <intrin.h>
|
||||
|
||||
Platform::SystemInfo_struct Platform::SystemInfo;
|
||||
extern void PlatformBlitInit();
|
||||
extern void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
|
||||
char* vendor, U32 processor, U32 properties, U32 properties2); // platform/platformCPU.cc
|
||||
|
||||
static void getBrand(char* brand)
|
||||
{
|
||||
S32 extendedInfo[4];
|
||||
__cpuid(extendedInfo, 0x80000000);
|
||||
S32 numberExtendedIds = extendedInfo[0];
|
||||
|
||||
// Sets brand
|
||||
if (numberExtendedIds >= 0x80000004)
|
||||
{
|
||||
int offset = 0;
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
S32 brandInfo[4];
|
||||
__cpuidex(brandInfo, 0x80000002 + i, 0);
|
||||
|
||||
*reinterpret_cast<int*>(brand + offset + 0) = brandInfo[0];
|
||||
*reinterpret_cast<int*>(brand + offset + 4) = brandInfo[1];
|
||||
*reinterpret_cast<int*>(brand + offset + 8) = brandInfo[2];
|
||||
*reinterpret_cast<int*>(brand + offset + 12) = brandInfo[3];
|
||||
|
||||
offset += sizeof(S32) * 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum CpuFlags
|
||||
{
|
||||
// EDX Register flags
|
||||
BIT_MMX = BIT(23),
|
||||
BIT_SSE = BIT(25),
|
||||
BIT_SSE2 = BIT(26),
|
||||
BIT_3DNOW = BIT(31), // only available for amd cpus in x86
|
||||
|
||||
// These use a different value for comparison than the above flags (ECX Register)
|
||||
BIT_SSE3 = BIT(0),
|
||||
BIT_SSE3ex = BIT(9),
|
||||
BIT_SSE4_1 = BIT(19),
|
||||
BIT_SSE4_2 = BIT(20),
|
||||
|
||||
BIT_XSAVE_RESTORE = BIT(27),
|
||||
BIT_AVX = BIT(28),
|
||||
};
|
||||
|
||||
static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor)
|
||||
{
|
||||
S32 cpuInfo[4];
|
||||
__cpuid(cpuInfo, 1);
|
||||
U32 eax = cpuInfo[0]; // eax
|
||||
U32 edx = cpuInfo[3]; // edx
|
||||
U32 ecx = cpuInfo[2]; // ecx
|
||||
|
||||
processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0;
|
||||
processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0;
|
||||
processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
|
||||
processor.properties |= (ecx & BIT_SSE3) ? CPU_PROP_SSE3 : 0;
|
||||
processor.properties |= (ecx & BIT_SSE3ex) ? CPU_PROP_SSE3ex : 0;
|
||||
processor.properties |= (ecx & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
|
||||
processor.properties |= (ecx & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
|
||||
|
||||
// AVX detection requires that xsaverestore is supported
|
||||
if (ecx & BIT_XSAVE_RESTORE && ecx & BIT_AVX)
|
||||
{
|
||||
bool supportsAVX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6;
|
||||
if (supportsAVX)
|
||||
{
|
||||
processor.properties |= CPU_PROP_AVX;
|
||||
}
|
||||
}
|
||||
|
||||
if (processor.isMultiCore)
|
||||
processor.properties |= CPU_PROP_MP;
|
||||
|
||||
#ifdef TORQUE_CPU_X64
|
||||
processor.properties |= CPU_PROP_64bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Processor::init()
|
||||
{
|
||||
|
|
@ -40,18 +116,13 @@ void Processor::init()
|
|||
// www.intel.com
|
||||
// http://developer.intel.com/design/PentiumII/manuals/24512701.pdf
|
||||
|
||||
Con::printf("Processor Init:");
|
||||
|
||||
Platform::SystemInfo.processor.type = CPU_X86Compatible;
|
||||
Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible");
|
||||
Platform::SystemInfo.processor.mhz = 0;
|
||||
Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_LE;
|
||||
Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE;
|
||||
|
||||
char vendor[0x20];
|
||||
dMemset(vendor, 0, sizeof(vendor));
|
||||
U32 properties = 0;
|
||||
U32 processor = 0;
|
||||
U32 properties2 = 0;
|
||||
|
||||
S32 vendorInfo[4];
|
||||
__cpuid(vendorInfo, 0);
|
||||
|
|
@ -59,17 +130,14 @@ void Processor::init()
|
|||
*reinterpret_cast<int*>(vendor + 4) = vendorInfo[3]; // edx
|
||||
*reinterpret_cast<int*>(vendor + 8) = vendorInfo[2]; // ecx
|
||||
|
||||
S32 cpuInfo[4];
|
||||
__cpuid(cpuInfo, 1);
|
||||
processor = cpuInfo[0]; // eax
|
||||
properties = cpuInfo[3]; // edx
|
||||
properties2 = cpuInfo[2]; // ecx
|
||||
char brand[0x40];
|
||||
dMemset(brand, 0, sizeof(brand));
|
||||
getBrand(brand);
|
||||
|
||||
SetProcessorInfo(Platform::SystemInfo.processor, vendor, processor, properties, properties2);
|
||||
SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand);
|
||||
detectCpuFeatures(Platform::SystemInfo.processor);
|
||||
|
||||
// now calculate speed of processor...
|
||||
U32 nearmhz = 0; // nearest rounded mhz
|
||||
U32 mhz = 0; // calculated value.
|
||||
U32 mhz = 1000; // default if it can't be found
|
||||
|
||||
LONG result;
|
||||
DWORD data = 0;
|
||||
|
|
@ -83,56 +151,35 @@ void Processor::init()
|
|||
result = ::RegQueryValueExA (hKey, "~MHz",NULL, NULL,(LPBYTE)&data, &dataSize);
|
||||
|
||||
if (result == ERROR_SUCCESS)
|
||||
nearmhz = mhz = data;
|
||||
mhz = data;
|
||||
|
||||
::RegCloseKey(hKey);
|
||||
}
|
||||
|
||||
Platform::SystemInfo.processor.mhz = mhz;
|
||||
|
||||
if (mhz==0)
|
||||
{
|
||||
Con::printf(" %s, (Unknown) Mhz", Platform::SystemInfo.processor.name);
|
||||
// stick SOMETHING in so it isn't ZERO.
|
||||
Platform::SystemInfo.processor.mhz = 200; // seems a decent value.
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nearmhz >= 1000)
|
||||
Con::printf(" %s, ~%.2f Ghz", Platform::SystemInfo.processor.name, ((float)nearmhz)/1000.0f);
|
||||
else
|
||||
Con::printf(" %s, ~%d Mhz", Platform::SystemInfo.processor.name, nearmhz);
|
||||
if (nearmhz != mhz)
|
||||
{
|
||||
if (mhz >= 1000)
|
||||
Con::printf(" (timed at roughly %.2f Ghz)", ((float)mhz)/1000.0f);
|
||||
else
|
||||
Con::printf(" (timed at roughly %d Mhz)", mhz);
|
||||
}
|
||||
}
|
||||
|
||||
if( Platform::SystemInfo.processor.numAvailableCores > 0
|
||||
|| Platform::SystemInfo.processor.numPhysicalProcessors > 0
|
||||
|| Platform::SystemInfo.processor.isHyperThreaded )
|
||||
Platform::SystemInfo.processor.properties |= CPU_PROP_MP;
|
||||
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_FPU)
|
||||
Con::printf( " FPU detected" );
|
||||
Con::printf("Processor Init:");
|
||||
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
|
||||
Con::printf( " MMX detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW)
|
||||
Con::printf( " 3DNow detected" );
|
||||
Con::printf(" MMX detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
|
||||
Con::printf( " SSE detected" );
|
||||
if( Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 )
|
||||
Con::printf( " SSE2 detected" );
|
||||
if( Platform::SystemInfo.processor.isHyperThreaded )
|
||||
Con::printf( " HT detected" );
|
||||
if( Platform::SystemInfo.processor.properties & CPU_PROP_MP )
|
||||
Con::printf( " MP detected [%i cores, %i logical, %i physical]",
|
||||
Platform::SystemInfo.processor.numAvailableCores,
|
||||
Platform::SystemInfo.processor.numLogicalProcessors,
|
||||
Platform::SystemInfo.processor.numPhysicalProcessors );
|
||||
Con::printf(" SSE detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
|
||||
Con::printf(" SSE2 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
|
||||
Con::printf(" SSE3 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
|
||||
Con::printf(" SSE3ex detected ");
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
|
||||
Con::printf(" SSE4.1 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
|
||||
Con::printf(" SSE4.2 detected" );
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
|
||||
Con::printf(" AVX detected");
|
||||
|
||||
if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
|
||||
Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
|
||||
|
||||
Con::printf(" ");
|
||||
|
||||
PlatformBlitInit();
|
||||
|
|
|
|||
|
|
@ -29,8 +29,6 @@
|
|||
|
||||
extern void mInstallLibrary_C();
|
||||
extern void mInstallLibrary_ASM();
|
||||
extern void mInstall_AMD_Math();
|
||||
extern void mInstall_Library_SSE();
|
||||
|
||||
//--------------------------------------
|
||||
DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
|
||||
|
|
@ -40,7 +38,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
|
|||
" - 'C' Enable the C math routines. C routines are always enabled.\n\n"
|
||||
" - 'FPU' Enable floating point unit routines.\n\n"
|
||||
" - 'MMX' Enable MMX math routines.\n\n"
|
||||
" - '3DNOW' Enable 3dNow! math routines.\n\n"
|
||||
" - 'SSE' Enable SSE math routines.\n\n"
|
||||
"@ingroup Math")
|
||||
|
||||
|
|
@ -72,10 +69,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
|
|||
properties |= CPU_PROP_MMX;
|
||||
continue;
|
||||
}
|
||||
if (dStricmp(str, "3DNOW") == 0) {
|
||||
properties |= CPU_PROP_3DNOW;
|
||||
continue;
|
||||
}
|
||||
if (dStricmp(str, "SSE") == 0) {
|
||||
properties |= CPU_PROP_SSE;
|
||||
continue;
|
||||
|
|
@ -116,17 +109,11 @@ void Math::init(U32 properties)
|
|||
if (properties & CPU_PROP_MMX)
|
||||
{
|
||||
Con::printf(" Installing MMX extensions");
|
||||
if (properties & CPU_PROP_3DNOW)
|
||||
{
|
||||
Con::printf(" Installing 3DNow extensions");
|
||||
mInstall_AMD_Math();
|
||||
}
|
||||
}
|
||||
|
||||
if (properties & CPU_PROP_SSE)
|
||||
{
|
||||
Con::printf(" Installing SSE extensions");
|
||||
mInstall_Library_SSE();
|
||||
}
|
||||
|
||||
Con::printf(" ");
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#if defined( TORQUE_OS_WIN )
|
||||
|
||||
#include "platform/platformCPUCount.h"
|
||||
#include "console/console.h"
|
||||
#include <windows.h>
|
||||
#include <intrin.h>
|
||||
#include <stdio.h>
|
||||
|
|
@ -52,12 +53,10 @@ namespace CPUInfo {
|
|||
return bitSetCount;
|
||||
}
|
||||
|
||||
EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum )
|
||||
EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore )
|
||||
{
|
||||
EConfig StatusFlag = CONFIG_UserConfigIssue;
|
||||
TotAvailLogical = 0;
|
||||
TotAvailCore = 0;
|
||||
PhysicalNum = 0;
|
||||
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
|
||||
DWORD returnLength = 0;
|
||||
|
|
@ -68,42 +67,37 @@ namespace CPUInfo {
|
|||
|
||||
rc = GetLogicalProcessorInformation( buffer, &returnLength );
|
||||
|
||||
// if we fail, assume single threaded
|
||||
if( FALSE == rc )
|
||||
{
|
||||
free( buffer );
|
||||
return StatusFlag;
|
||||
Con::errorf("Unable to determine CPU Count, assuming 1 core");
|
||||
TotAvailCore = 1;
|
||||
TotAvailLogical = 1;
|
||||
return CONFIG_SingleCoreAndHTNotCapable;
|
||||
}
|
||||
|
||||
#pragma push
|
||||
#pragma warning (disable: 6011)
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
||||
|
||||
DWORD byteOffset = 0;
|
||||
while( byteOffset + sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) <= returnLength )
|
||||
{
|
||||
switch( ptr->Relationship )
|
||||
{
|
||||
|
||||
case RelationProcessorCore:
|
||||
if (ptr->Relationship == RelationProcessorCore)
|
||||
{
|
||||
TotAvailCore++;
|
||||
|
||||
// A hyperthreaded core supplies more than one logical processor.
|
||||
TotAvailLogical += CountSetBits( ptr->ProcessorMask );
|
||||
break;
|
||||
|
||||
case RelationProcessorPackage:
|
||||
// Logical processors share a physical package.
|
||||
PhysicalNum++;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
TotAvailLogical += CountSetBits(ptr->ProcessorMask);
|
||||
}
|
||||
|
||||
byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION );
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
free( buffer );
|
||||
#pragma pop
|
||||
|
||||
StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
|
||||
EConfig StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
|
||||
|
||||
if( TotAvailCore == 1 && TotAvailLogical > TotAvailCore )
|
||||
StatusFlag = CONFIG_SingleCoreHTEnabled;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue