Merge pull request #7 from Ragora/feature-linux-cpu-info-fixed

Feature: Detect POSIX CPU Information
This commit is contained in:
Jeff Hutchinson 2022-05-24 22:41:59 -04:00 committed by GitHub
commit 04a94669eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 232 additions and 471 deletions

View file

@ -1,216 +0,0 @@
//-----------------------------------------------------------------------------
// Copyright (c) 2012 GarageGames, LLC
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//-----------------------------------------------------------------------------
#include "math/mMathFn.h"
#include "math/mPlane.h"
#include "math/mMatrix.h"
// extern void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult);
// extern void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult);
/* not currently implemented.
void Athlon_MatrixF_x_Point3F(const F32 *m, const F32 *p, F32 *presult)
{
m;
p;
presult;
}
*/
//============================================================
// Here's the C code for MatF_x_MatF:
// note that the code below does it in a different order (optimal asm, after all!)
//
// r[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8] + a[3]*b[12];
// r[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9] + a[3]*b[13];
// r[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14];
// r[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15];
//
// r[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8] + a[7]*b[12];
// r[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9] + a[7]*b[13];
// r[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14];
// r[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15];
//
// r[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12];
// r[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13];
// r[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14];
// r[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15];
//
// r[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12];
// r[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13];
// r[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14];
// r[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
//============================================================
#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
#define ADD_3DNOW_FUNCS
// inlined version here.
void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
{
__asm
{
femms
mov ecx, matA
mov edx, matB
mov eax, result
prefetch [ecx+32] ;// These may help -
prefetch [edx+32] ;// and probably don't hurt
movq mm0,[ecx] ;// a21 | a11
movq mm1,[ecx+8] ;// a41 | a31
movq mm4,[edx] ;// b21 | b11
punpckhdq mm2,mm0 ;// a21 |
movq mm5,[edx+16] ;// b22 | b12
punpckhdq mm3,mm1 ;// a41 |
movq mm6,[edx+32] ;// b23 | b13
punpckldq mm0,mm0 ;// a11 | a11
punpckldq mm1,mm1 ;// a31 | a31
pfmul mm4,mm0 ;// a11*b21 | a11*b11
punpckhdq mm2,mm2 ;// a21 | a21
pfmul mm0,[edx+8] ;// a11*b41 | a11*b31
movq mm7,[edx+48] ;// b24 | b14
pfmul mm5,mm2 ;// a21*b22 | a21*b12
punpckhdq mm3,mm3 ;// a41 | a41
pfmul mm2,[edx+24] ;// a21*b42 | a21*b32
pfmul mm6,mm1 ;// a31*b23 | a31*b13
pfadd mm5,mm4 ;// a21*b22 + a11*b21 | a21*b12 + a11*b11
pfmul mm1,[edx+40] ;// a31*b43 | a31*b33
pfadd mm2,mm0 ;// a21*b42 + a11*b41 | a21*b32 + a11*b31
pfmul mm7,mm3 ;// a41*b24 | a41*b14
pfadd mm6,mm5 ;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
pfmul mm3,[edx+56] ;// a41*b44 | a41*b34
pfadd mm2,mm1 ;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
pfadd mm7,mm6 ;// a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13
movq mm0,[ecx+16] ;// a22 | a12
pfadd mm3,mm2 ;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
movq mm1,[ecx+24] ;// a42 | a32
movq [eax],mm7 ;// r21 | r11
movq mm4,[edx] ;// b21 | b11
movq [eax+8],mm3 ;// r41 | r31
punpckhdq mm2,mm0 ;// a22 | XXX
movq mm5,[edx+16] ;// b22 | b12
punpckhdq mm3,mm1 ;// a42 | XXX
movq mm6,[edx+32] ;// b23 | b13
punpckldq mm0,mm0 ;// a12 | a12
punpckldq mm1,mm1 ;// a32 | a32
pfmul mm4,mm0 ;// a12*b21 | a12*b11
punpckhdq mm2,mm2 ;// a22 | a22
pfmul mm0,[edx+8] ;// a12*b41 | a12*b31
movq mm7,[edx+48] ;// b24 | b14
pfmul mm5,mm2 ;// a22*b22 | a22*b12
punpckhdq mm3,mm3 ;// a42 | a42
pfmul mm2,[edx+24] ;// a22*b42 | a22*b32
pfmul mm6,mm1 ;// a32*b23 | a32*b13
pfadd mm5,mm4 ;// a12*b21 + a22*b22 | a12*b11 + a22*b12
pfmul mm1,[edx+40] ;// a32*b43 | a32*b33
pfadd mm2,mm0 ;// a12*b41 + a22*b42 | a12*b11 + a22*b32
pfmul mm7,mm3 ;// a42*b24 | a42*b14
pfadd mm6,mm5 ;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
pfmul mm3,[edx+56] ;// a42*b44 | a42*b34
pfadd mm2,mm1 ;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
pfadd mm7,mm6 ;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
movq mm0,[ecx+32] ;// a23 | a13
pfadd mm3,mm2 ;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
movq mm1,[ecx+40] ;// a43 | a33
movq [eax+16],mm7 ;// r22 | r12
movq mm4,[edx] ;// b21 | b11
movq [eax+24],mm3 ;// r42 | r32
punpckhdq mm2,mm0 ;// a23 | XXX
movq mm5,[edx+16] ;// b22 | b12
punpckhdq mm3,mm1 ;// a43 | XXX
movq mm6,[edx+32] ;// b23 | b13
punpckldq mm0,mm0 ;// a13 | a13
punpckldq mm1,mm1 ;// a33 | a33
pfmul mm4,mm0 ;// a13*b21 | a13*b11
punpckhdq mm2,mm2 ;// a23 | a23
pfmul mm0,[edx+8] ;// a13*b41 | a13*b31
movq mm7,[edx+48] ;// b24 | b14
pfmul mm5,mm2 ;// a23*b22 | a23*b12
punpckhdq mm3,mm3 ;// a43 | a43
pfmul mm2,[edx+24] ;// a23*b42 | a23*b32
pfmul mm6,mm1 ;// a33*b23 | a33*b13
pfadd mm5,mm4 ;// a23*b22 + a13*b21 | a23*b12 + a13*b11
pfmul mm1,[edx+40] ;// a33*b43 | a33*b33
pfadd mm2,mm0 ;// a13*b41 + a23*b42 | a13*b31 + a23*b32
pfmul mm7,mm3 ;// a43*b24 | a43*b14
pfadd mm6,mm5 ;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
pfmul mm3,[edx+56] ;// a43*b44 | a43*b34
pfadd mm2,mm1 ;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
pfadd mm7,mm6 ;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
movq mm0,[ecx+48] ;// a24 | a14
pfadd mm3,mm2 ;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
movq mm1,[ecx+56] ;// a44 | a34
movq [eax+32],mm7 ;// r23 | r13
movq mm4,[edx] ;// b21 | b11
movq [eax+40],mm3 ;// r43 | r33
punpckhdq mm2,mm0 ;// a24 | XXX
movq mm5,[edx+16] ;// b22 | b12
punpckhdq mm3,mm1 ;// a44 | XXX
movq mm6,[edx+32] ;// b23 | b13
punpckldq mm0,mm0 ;// a14 | a14
punpckldq mm1,mm1 ;// a34 | a34
pfmul mm4,mm0 ;// a14*b21 | a14*b11
punpckhdq mm2,mm2 ;// a24 | a24
pfmul mm0,[edx+8] ;// a14*b41 | a14*b31
movq mm7,[edx+48] ;// b24 | b14
pfmul mm5,mm2 ;// a24*b22 | a24*b12
punpckhdq mm3,mm3 ;// a44 | a44
pfmul mm2,[edx+24] ;// a24*b 42 | a24*b32
pfmul mm6,mm1 ;// a34*b23 | a34*b13
pfadd mm5,mm4 ;// a14*b21 + a24*b22 | a14*b11 + a24*b12
pfmul mm1,[edx+40] ;// a34*b43 | a34*b33
pfadd mm2,mm0 ;// a14*b41 + a24*b 42 | a14*b31 + a24*b32
pfmul mm7,mm3 ;// a44*b24 | a44*b14
pfadd mm6,mm5 ;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
pfmul mm3,[edx+56] ;// a44*b44 | a44*b34
pfadd mm2,mm1 ;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
pfadd mm7,mm6 ;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
pfadd mm3,mm2 ;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
movq [eax+48],mm7 ;// r24 | r14
movq [eax+56],mm3 ;// r44 | r34
femms
}
}
#elif defined(TORQUE_SUPPORTS_NASM)
#define ADD_3DNOW_FUNCS
extern "C"
{
void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
}
#endif
void mInstall_AMD_Math()
{
#if defined(ADD_3DNOW_FUNCS)
m_matF_x_matF = Athlon_MatrixF_x_MatrixF;
#endif
// m_matF_x_point3F = Athlon_MatrixF_x_Point3F;
// m_matF_x_vectorF = Athlon_MatrixF_x_VectorF;
}

View file

@ -1,177 +0,0 @@
;-----------------------------------------------------------------------------
; Copyright (c) 2012 GarageGames, LLC
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this software and associated documentation files (the "Software"), to
; deal in the Software without restriction, including without limitation the
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
; sell copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in
; all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;-----------------------------------------------------------------------------
segment .data
matA dd 0
result dd 0
matB dd 0
segment .text
%macro export_fn 1
%ifidn __OUTPUT_FORMAT__, elf
; No underscore needed for ELF object files
global %1
%1:
%else
global _%1
_%1:
%endif
%endmacro
%define arg(x) [esp+(x*4)]
;void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
export_fn Athlon_MatrixF_x_MatrixF
mov ecx, arg(1)
mov edx, arg(2)
mov eax, arg(3)
femms
prefetch [ecx+32] ; These may help -
prefetch [edx+32] ; and probably don't hurt
movq mm0,[ecx] ; a21 | a11
movq mm1,[ecx+8] ; a41 | a31
movq mm4,[edx] ; b21 | b11
punpckhdq mm2,mm0 ; a21 |
movq mm5,[edx+16] ; b22 | b12
punpckhdq mm3,mm1 ; a41 |
movq mm6,[edx+32] ; b23 | b13
punpckldq mm0,mm0 ; a11 | a11
punpckldq mm1,mm1 ; a31 | a31
pfmul mm4,mm0 ; a11*b21 | a11*b11
punpckhdq mm2,mm2 ; a21 | a21
pfmul mm0,[edx+8] ; a11*b41 | a11*b31
movq mm7,[edx+48] ; b24 | b14
pfmul mm5,mm2 ; a21*b22 | a21*b12
punpckhdq mm3,mm3 ; a41 | a41
pfmul mm2,[edx+24] ; a21*b42 | a21*b32
pfmul mm6,mm1 ; a31*b23 | a31*b13
pfadd mm5,mm4 ; a21*b22 + a11*b21 | a21*b12 + a11*b11
pfmul mm1,[edx+40] ; a31*b43 | a31*b33
pfadd mm2,mm0 ; a21*b42 + a11*b41 | a21*b32 + a11*b31
pfmul mm7,mm3 ; a41*b24 | a41*b14
pfadd mm6,mm5 ; a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
pfmul mm3,[edx+56] ; a41*b44 | a41*b34
pfadd mm2,mm1 ; a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
pfadd mm7,mm6 ; a41*b24 + a21*b22 + a11*b21 + a31*b23 | a41*b14 + a21*b12 + a11*b11 + a31*b13
movq mm0,[ecx+16] ; a22 | a12
pfadd mm3,mm2 ; a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
movq mm1,[ecx+24] ; a42 | a32
movq [eax],mm7 ; r21 | r11
movq mm4,[edx] ; b21 | b11
movq [eax+8],mm3 ; r41 | r31
punpckhdq mm2,mm0 ; a22 | XXX
movq mm5,[edx+16] ; b22 | b12
punpckhdq mm3,mm1 ; a42 | XXX
movq mm6,[edx+32] ; b23 | b13
punpckldq mm0,mm0 ; a12 | a12
punpckldq mm1,mm1 ; a32 | a32
pfmul mm4,mm0 ; a12*b21 | a12*b11
punpckhdq mm2,mm2 ; a22 | a22
pfmul mm0,[edx+8] ; a12*b41 | a12*b31
movq mm7,[edx+48] ; b24 | b14
pfmul mm5,mm2 ; a22*b22 | a22*b12
punpckhdq mm3,mm3 ; a42 | a42
pfmul mm2,[edx+24] ; a22*b42 | a22*b32
pfmul mm6,mm1 ; a32*b23 | a32*b13
pfadd mm5,mm4 ; a12*b21 + a22*b22 | a12*b11 + a22*b12
pfmul mm1,[edx+40] ; a32*b43 | a32*b33
pfadd mm2,mm0 ; a12*b41 + a22*b42 | a12*b11 + a22*b32
pfmul mm7,mm3 ; a42*b24 | a42*b14
pfadd mm6,mm5 ; a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
pfmul mm3,[edx+56] ; a42*b44 | a42*b34
pfadd mm2,mm1 ; a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
pfadd mm7,mm6 ; a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
movq mm0,[ecx+32] ; a23 | a13
pfadd mm3,mm2 ; a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
movq mm1,[ecx+40] ; a43 | a33
movq [eax+16],mm7 ; r22 | r12
movq mm4,[edx] ; b21 | b11
movq [eax+24],mm3 ; r42 | r32
punpckhdq mm2,mm0 ; a23 | XXX
movq mm5,[edx+16] ; b22 | b12
punpckhdq mm3,mm1 ; a43 | XXX
movq mm6,[edx+32] ; b23 | b13
punpckldq mm0,mm0 ; a13 | a13
punpckldq mm1,mm1 ; a33 | a33
pfmul mm4,mm0 ; a13*b21 | a13*b11
punpckhdq mm2,mm2 ; a23 | a23
pfmul mm0,[edx+8] ; a13*b41 | a13*b31
movq mm7,[edx+48] ; b24 | b14
pfmul mm5,mm2 ; a23*b22 | a23*b12
punpckhdq mm3,mm3 ; a43 | a43
pfmul mm2,[edx+24] ; a23*b42 | a23*b32
pfmul mm6,mm1 ; a33*b23 | a33*b13
pfadd mm5,mm4 ; a23*b22 + a13*b21 | a23*b12 + a13*b11
pfmul mm1,[edx+40] ; a33*b43 | a33*b33
pfadd mm2,mm0 ; a13*b41 + a23*b42 | a13*b31 + a23*b32
pfmul mm7,mm3 ; a43*b24 | a43*b14
pfadd mm6,mm5 ; a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
pfmul mm3,[edx+56] ; a43*b44 | a43*b34
pfadd mm2,mm1 ; a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
pfadd mm7,mm6 ; a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
movq mm0,[ecx+48] ; a24 | a14
pfadd mm3,mm2 ; a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
movq mm1,[ecx+56] ; a44 | a34
movq [eax+32],mm7 ; r23 | r13
movq mm4,[edx] ; b21 | b11
movq [eax+40],mm3 ; r43 | r33
punpckhdq mm2,mm0 ; a24 | XXX
movq mm5,[edx+16] ; b22 | b12
punpckhdq mm3,mm1 ; a44 | XXX
movq mm6,[edx+32] ; b23 | b13
punpckldq mm0,mm0 ; a14 | a14
punpckldq mm1,mm1 ; a34 | a34
pfmul mm4,mm0 ; a14*b21 | a14*b11
punpckhdq mm2,mm2 ; a24 | a24
pfmul mm0,[edx+8] ; a14*b41 | a14*b31
movq mm7,[edx+48] ; b24 | b14
pfmul mm5,mm2 ; a24*b22 | a24*b12
punpckhdq mm3,mm3 ; a44 | a44
pfmul mm2,[edx+24] ; a24*b 42 | a24*b32
pfmul mm6,mm1 ; a34*b23 | a34*b13
pfadd mm5,mm4 ; a14*b21 + a24*b22 | a14*b11 + a24*b12
pfmul mm1,[edx+40] ; a34*b43 | a34*b33
pfadd mm2,mm0 ; a14*b41 + a24*b 42 | a14*b31 + a24*b32
pfmul mm7,mm3 ; a44*b24 | a44*b14
pfadd mm6,mm5 ; a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
pfmul mm3,[edx+56] ; a44*b44 | a44*b34
pfadd mm2,mm1 ; a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
pfadd mm7,mm6 ; a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
pfadd mm3,mm2 ; a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
movq [eax+48],mm7 ; r24 | r14
movq [eax+56],mm3 ; r44 | r34
femms
ret

View file

@ -60,19 +60,6 @@ TEST(MatrixF, MultiplyImplmentations)
U32 cpuProperties = Platform::SystemInfo.processor.properties;
bool same;
// Test 3D NOW! if it is available
F32 mrAMD[16];
if (cpuProperties & CPU_PROP_3DNOW)
{
Athlon_MatrixF_x_MatrixF(m1, m2, mrAMD);
same = true;
for (S32 i = 0; i < 16; i++)
same &= mIsEqual(mrC[i], mrAMD[i]);
EXPECT_TRUE(same) << "Matrix multiplication verification failed. (C vs. 3D NOW!)";
}
// Test SSE if it is available
F32 mrSSE[16];
if (cpuProperties & CPU_PROP_SSE)

View file

@ -68,18 +68,17 @@ enum ProcessorProperties
CPU_PROP_C = (1<<0), ///< We should use C fallback math functions.
CPU_PROP_FPU = (1<<1), ///< Has an FPU. (It better!)
CPU_PROP_MMX = (1<<2), ///< Supports MMX instruction set extension.
CPU_PROP_3DNOW = (1<<3), ///< Supports AMD 3dNow! instruction set extension.
CPU_PROP_SSE = (1<<4), ///< Supports SSE instruction set extension.
CPU_PROP_SSE2 = (1<<5), ///< Supports SSE2 instruction set extension.
CPU_PROP_SSE3 = (1<<6), ///< Supports SSE3 instruction set extension.
CPU_PROP_SSE3ex = (1<<7), ///< Supports Supplemental SSE3 instruction set
CPU_PROP_SSE4_1 = (1<<8), ///< Supports SSE4_1 instruction set extension.
CPU_PROP_SSE4_2 = (1<<9), ///< Supports SSE4_2 instruction set extension.
CPU_PROP_AVX = (1<<10), ///< Supports AVX256 instruction set extension.
CPU_PROP_MP = (1<<11), ///< This is a multi-processor system.
CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN.
CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable
CPU_PROP_NEON = (1<<14), ///< Supports the Arm Neon instruction set extension.
CPU_PROP_SSE = (1<<3), ///< Supports SSE instruction set extension.
CPU_PROP_SSE2 = (1<<4), ///< Supports SSE2 instruction set extension.
CPU_PROP_SSE3 = (1<<5), ///< Supports SSE3 instruction set extension.
CPU_PROP_SSE3ex = (1<<6), ///< Supports Supplemental SSE3 instruction set
CPU_PROP_SSE4_1 = (1<<7), ///< Supports SSE4_1 instruction set extension.
CPU_PROP_SSE4_2 = (1<<8), ///< Supports SSE4_2 instruction set extension.
CPU_PROP_AVX = (1<<9), ///< Supports AVX256 instruction set extension.
CPU_PROP_MP = (1<<10), ///< This is a multi-processor system.
CPU_PROP_LE = (1<<11), ///< This processor is LITTLE ENDIAN.
CPU_PROP_64bit = (1<<12), ///< This processor is 64-bit capable
CPU_PROP_NEON = (1<<13), ///< Supports the Arm Neon instruction set extension.
};
/// Processor info manager.

View file

@ -28,7 +28,8 @@
Signal<void(void)> Platform::SystemInfoReady;
void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand)
// fill the specified structure with information obtained from asm code
void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand)
{
if (dStricmp(vendor, "GenuineIntel") == 0)
{
@ -70,4 +71,4 @@ void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor
// Trigger the signal
Platform::SystemInfoReady.trigger();
}
}

View file

@ -64,7 +64,7 @@ namespace CPUInfo
EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores );
} // namespace CPUInfo
void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand);
void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand);
#endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_

View file

@ -190,7 +190,7 @@ void Processor::init()
detectCpuFeatures(procflags);
Platform::SystemInfo.processor.properties = procflags;
SetProcessoInfo(Platform::SystemInfo.processor, vendor, brandString);
SetProcessorInfo(Platform::SystemInfo.processor, vendor, brandString);
Con::printf("System & Processor Information:");

View file

@ -27,8 +27,6 @@
#include "console/engineAPI.h"
extern void mInstallLibrary_C();
extern void mInstallLibrary_Vec();
extern void mInstall_Library_SSE();
static MRandomLCG sgPlatRandom;
@ -115,7 +113,6 @@ void Math::init(U32 properties)
if( properties & CPU_PROP_SSE )
{
Con::printf( " Installing SSE extensions" );
mInstall_Library_SSE();
}
#endif

View file

@ -22,30 +22,233 @@
#ifndef __APPLE__
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include "platform/platform.h"
#include "platformPOSIX/platformPOSIX.h"
#include "console/console.h"
#include "core/stringTable.h"
#include "core/strings/stringFunctions.h"
#include <math.h>
#include "platform/platformCPUCount.h"
#include "console/console.h"
#include <unistd.h>
Platform::SystemInfo_struct Platform::SystemInfo;
void Processor::init() {}
static inline void rtrim(std::string &s)
{
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
}
static inline void ltrim(std::string &s)
{
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
}
static void getCPUInformation()
{
std::string vendorString;
std::string brandString;
std::ifstream cpuInfo("/proc/cpuinfo");
U32 logicalCoreCount = 0;
U32 physicalCoreCount = 1;
if (cpuInfo.is_open())
{
// Load every line of the CPU Info
std::string line;
while (std::getline(cpuInfo, line))
{
std::string fieldName = line.substr(0, line.find(":"));
rtrim(fieldName);
// Entries are newline separated
if (fieldName == "")
{
++logicalCoreCount;
continue;
}
std::string fieldValue = line.substr(line.find(":") + 1, line.length());
ltrim(fieldValue);
rtrim(fieldValue);
// Load fields
if (fieldName == "vendor_id")
{
vendorString = fieldValue.c_str();
}
else if (fieldName == "model name")
{
brandString = fieldValue.c_str();
}
else if (fieldName == "cpu cores")
{
physicalCoreCount = dAtoui(fieldValue.c_str());
}
else if (fieldName == "flags")
{
std::vector<std::string> flags;
std::istringstream flagStream(fieldValue);
std::string currentFlag;
while (std::getline(flagStream, currentFlag, ' '))
{
flags.push_back(currentFlag);
}
// Set CPU flags
if (std::find(flags.begin(), flags.end(), "fpu") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_FPU;
}
if (std::find(flags.begin(), flags.end(), "sse3") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3;
}
if (std::find(flags.begin(), flags.end(), "avx") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_AVX;
}
if (std::find(flags.begin(), flags.end(), "ssse3") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3ex;
}
if (std::find(flags.begin(), flags.end(), "sse") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE;
}
if (std::find(flags.begin(), flags.end(), "sse2") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE2;
}
if (std::find(flags.begin(), flags.end(), "sse4_1") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_1;
}
if (std::find(flags.begin(), flags.end(), "sse4_2") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_2;
}
if (std::find(flags.begin(), flags.end(), "mmx") != flags.end())
{
Platform::SystemInfo.processor.properties |= CPU_PROP_MMX;
}
}
}
cpuInfo.close();
}
else
{
logicalCoreCount = 1;
}
Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount;
Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount;
Platform::SystemInfo.processor.isHyperThreaded = logicalCoreCount != physicalCoreCount;
Platform::SystemInfo.processor.isMultiCore = physicalCoreCount != 1;
Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount;
Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount;
if (Platform::SystemInfo.processor.isMultiCore)
{
Platform::SystemInfo.processor.properties |= CPU_PROP_MP;
}
// Load processor base frequency
std::ifstream baseFrequencyStream("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency");
if (baseFrequencyStream.is_open())
{
U32 baseFrequencyKHz = 0;
baseFrequencyStream >> baseFrequencyKHz;
Platform::SystemInfo.processor.mhz = baseFrequencyKHz / 1000;
baseFrequencyStream.close();
}
SetProcessorInfo(Platform::SystemInfo.processor, vendorString.c_str(), brandString.c_str());
}
void Processor::init()
{
getCPUInformation();
#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_X32)
// Set sane default information
Platform::SystemInfo.processor.properties |= CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE ;
#elif defined(TORQUE_CPU_ARM32) || defined(TORQUE_CPU_ARM64)
Platform::SystemInfo.processor.type = CPU_ArmCompatible;
Platform::SystemInfo.processor.name = StringTable->insert("Unknown ARM Processor");
Platform::SystemInfo.processor.properties = CPU_PROP_C;
#else
#warning Unsupported CPU
#endif
// Set 64bit flag
#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
Platform::SystemInfo.processor.properties |= CPU_PROP_64bit;
#endif
// Once CPU information is resolved, produce an output like Windows does
Con::printf("Processor Init:");
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
Con::printf(" MMX detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
Con::printf(" SSE detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
Con::printf(" SSE2 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
Con::printf(" SSE3 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
Con::printf(" SSE4.1 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
Con::printf(" SSE4.2 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
Con::printf(" AVX detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
Con::printf(" SSE3ex detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
Con::printf(" ");
}
// TODO LINUX CPUInfo::CPUCount better support
namespace CPUInfo
{
EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
EConfig CPUCount(U32 &logical, U32 &physical)
{
PhysicalNum = TotAvailCore = 0;
TotAvailLogical = (int)sysconf(_SC_NPROCESSORS_ONLN);
// We don't set logical or physical here because it's already been determined by this point
if (Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1)
{
return CONFIG_SingleCoreHTEnabled;
}
else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors > 1)
{
return CONFIG_MultiCoreAndHTNotCapable;
}
else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1)
{
return CONFIG_SingleCoreAndHTNotCapable;
}
return CONFIG_SingleCoreHTDisabled;
return CONFIG_MultiCoreAndHTEnabled;
}
}; // namespace CPUInfo
}; // namespace CPUInfo
#endif

View file

@ -31,11 +31,6 @@
extern void mInstallLibrary_C();
extern void mInstallLibrary_ASM();
extern void mInstall_AMD_Math();
extern void mInstall_Library_SSE();
//--------------------------------------
DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
"@brief Install the math library with specified extensions.\n\n"
@ -70,10 +65,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
properties |= CPU_PROP_MMX;
continue;
}
if (dStricmp(*argv, "3DNOW") == 0) {
properties |= CPU_PROP_3DNOW;
continue;
}
if (dStricmp(*argv, "SSE") == 0) {
properties |= CPU_PROP_SSE;
continue;
@ -112,18 +103,12 @@ void Math::init(U32 properties)
if (properties & CPU_PROP_MMX)
{
Con::printf(" Installing MMX extensions");
if (properties & CPU_PROP_3DNOW)
{
Con::printf(" Installing 3DNow extensions");
mInstall_AMD_Math();
}
}
#if !defined(__MWERKS__) || (__MWERKS__ >= 0x2400)
if (properties & CPU_PROP_SSE)
{
Con::printf(" Installing SSE extensions");
mInstall_Library_SSE();
}
#endif //mwerks>2.4

View file

@ -82,9 +82,6 @@ static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor)
U32 edx = cpuInfo[3]; // edx
U32 ecx = cpuInfo[2]; // ecx
if (processor.type == ProcessorType::CPU_AMD)
processor.properties |= (edx & BIT_3DNOW) ? CPU_PROP_3DNOW : 0;
processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0;
processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0;
processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
@ -137,7 +134,7 @@ void Processor::init()
dMemset(brand, 0, sizeof(brand));
getBrand(brand);
SetProcessoInfo(Platform::SystemInfo.processor, vendor, brand);
SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand);
detectCpuFeatures(Platform::SystemInfo.processor);
U32 mhz = 1000; // default if it can't be found
@ -165,8 +162,6 @@ void Processor::init()
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
Con::printf(" MMX detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW)
Con::printf(" 3DNow detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
Con::printf(" SSE detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)

View file

@ -29,8 +29,6 @@
extern void mInstallLibrary_C();
extern void mInstallLibrary_ASM();
extern void mInstall_AMD_Math();
extern void mInstall_Library_SSE();
//--------------------------------------
DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
@ -40,7 +38,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
" - 'C' Enable the C math routines. C routines are always enabled.\n\n"
" - 'FPU' Enable floating point unit routines.\n\n"
" - 'MMX' Enable MMX math routines.\n\n"
" - '3DNOW' Enable 3dNow! math routines.\n\n"
" - 'SSE' Enable SSE math routines.\n\n"
"@ingroup Math")
@ -72,10 +69,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
properties |= CPU_PROP_MMX;
continue;
}
if (dStricmp(str, "3DNOW") == 0) {
properties |= CPU_PROP_3DNOW;
continue;
}
if (dStricmp(str, "SSE") == 0) {
properties |= CPU_PROP_SSE;
continue;
@ -116,17 +109,11 @@ void Math::init(U32 properties)
if (properties & CPU_PROP_MMX)
{
Con::printf(" Installing MMX extensions");
if (properties & CPU_PROP_3DNOW)
{
Con::printf(" Installing 3DNow extensions");
mInstall_AMD_Math();
}
}
if (properties & CPU_PROP_SSE)
{
Con::printf(" Installing SSE extensions");
mInstall_Library_SSE();
}
Con::printf(" ");