flush out windows cpu detection.

This commit is contained in:
Jeff Hutchinson 2021-09-26 23:43:06 -04:00
parent ca0a5d41a7
commit 24e5db942b
5 changed files with 136 additions and 248 deletions

View file

@ -70,17 +70,16 @@ enum ProcessorProperties
CPU_PROP_MMX = (1<<2), ///< Supports MMX instruction set extension.
CPU_PROP_3DNOW = (1<<3), ///< Supports AMD 3dNow! instruction set extension.
CPU_PROP_SSE = (1<<4), ///< Supports SSE instruction set extension.
CPU_PROP_RDTSC = (1<<5), ///< Supports Read Time Stamp Counter op.
CPU_PROP_SSE2 = (1<<6), ///< Supports SSE2 instruction set extension.
CPU_PROP_SSE3 = (1<<7), ///< Supports SSE3 instruction set extension.
CPU_PROP_SSE3ex = (1<<8), ///< Supports extended SSE3 instruction set
CPU_PROP_SSE4_1 = (1<<9), ///< Supports SSE4_1 instruction set extension.
CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension.
CPU_PROP_AVX = (1<<11), ///< Supports AVX256 instruction set extension.
CPU_PROP_MP = (1<<12), ///< This is a multi-processor system.
CPU_PROP_LE = (1<<13), ///< This processor is LITTLE ENDIAN.
CPU_PROP_64bit = (1<<14), ///< This processor is 64-bit capable
CPU_PROP_NEON = (1<<15), ///< Supports the Arm Neon instruction set extension.
CPU_PROP_SSE2 = (1<<5), ///< Supports SSE2 instruction set extension.
CPU_PROP_SSE3 = (1<<6), ///< Supports SSE3 instruction set extension.
CPU_PROP_SSE3ex = (1<<7), ///< Supports Supplemental SSE3 instruction set
CPU_PROP_SSE4_1 = (1<<8), ///< Supports SSE4_1 instruction set extension.
CPU_PROP_SSE4_2 = (1<<9), ///< Supports SSE4_2 instruction set extension.
CPU_PROP_AVX = (1<<10), ///< Supports AVX256 instruction set extension.
CPU_PROP_MP = (1<<11), ///< This is a multi-processor system.
CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN.
CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable
CPU_PROP_NEON = (1<<14), ///< Supports the Arm Neon instruction set extension.
};
/// Processor info manager.

View file

@ -1,128 +0,0 @@
;-----------------------------------------------------------------------------
; Copyright (c) 2012 GarageGames, LLC
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this software and associated documentation files (the "Software"), to
; deal in the Software without restriction, including without limitation the
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
; sell copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in
; all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;-----------------------------------------------------------------------------
segment .text
; syntax: export_fn <function name>
%macro export_fn 1
%ifidn __OUTPUT_FORMAT__, elf
; No underscore needed for ELF object files
global %1
%1:
%else
global _%1
_%1:
%endif
%endmacro
; push registers
%macro pushreg 0
; pushad
push ebx
push ebp
push esi
push edi
%endmacro
; pop registers
%macro popreg 0
pop edi
pop esi
pop ebp
pop ebx
; popad
%endmacro
; void detectX86CPUInfo(char *vendor, U32 *processor, U32 *properties);
export_fn detectX86CPUInfo
push ebp
mov ebp, esp
pushreg
push edx
push ecx
pushfd
pushfd ; save EFLAGS to stack
pop eax ; move EFLAGS into EAX
mov ebx, eax
xor eax, 0x200000 ; flip bit 21
push eax
popfd ; restore EFLAGS
pushfd
pop eax
cmp eax, ebx
jz EXIT ; doesn't support CPUID instruction
;
; get vendor information using CPUID eax == 0
xor eax, eax
cpuid
; store the vendor tag (12 bytes in ebx, edx, ecx) in the first parameter,
; which should be a char[13]
push eax ; save eax
mov eax, [ebp+8] ; store the char* address in eax
mov [eax], ebx ; move ebx into the first 4 bytes
add eax, 4 ; advance the char* 4 bytes
mov [eax], edx ; move edx into the next 4 bytes
add eax, 4 ; advance the char* 4 bytes
mov [eax], ecx ; move ecx into the last 4 bytes
pop eax ; restore eax
; get generic extended CPUID info
mov eax, 1
cpuid ; eax=1, so cpuid queries feature information
and eax, 0x0fff3fff
push ecx
mov ecx, [ebp+12]
mov [ecx], eax ; just store the model bits in processor param
mov ecx, [ebp+16]
mov [ecx], edx ; set properties param
pop ecx
; want to check for 3DNow(tm).
; need to see if extended cpuid functions present.
mov eax, 0x80000000
cpuid
cmp eax, 0x80000000
jbe MAYBE_3DLATER
mov eax, 0x80000001
cpuid
; 3DNow if bit 31 set -> put bit in our properties
and edx, 0x80000000
push eax
mov eax, [ebp+16]
or [eax], edx
pop eax
MAYBE_3DLATER:
EXIT:
popfd
pop ecx
pop edx
popreg
pop ebp
ret

View file

@ -96,7 +96,7 @@ static void detectCpuFeatures(U32 &procflags)
U32 lraw;
// All Cpus have fpu
procflags = CPU_PROP_FPU;
procflags = CPU_PROP_C | CPU_PROP_FPU;
#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
@ -215,6 +215,9 @@ void Processor::init()
Con::printf(" AVX detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_NEON)
Con::printf(" Neon detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
Con::printf( "" );

View file

@ -24,49 +24,15 @@
#include "platformWin32/platformWin32.h"
#include "console/console.h"
#include "core/stringTable.h"
#include "platform/platformCPUCount.h"
#include <math.h>
#include <intrin.h>
Platform::SystemInfo_struct Platform::SystemInfo;
extern void PlatformBlitInit();
extern void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
char* vendor, char* brand, U32 processor, U32 properties, U32 properties2); // platform/platformCPU.cc
void Processor::init()
static void getBrand(char* brand)
{
// Reference:
// www.cyrix.com
// www.amd.com
// www.intel.com
// http://developer.intel.com/design/PentiumII/manuals/24512701.pdf
Con::printf("Processor Init:");
Platform::SystemInfo.processor.type = CPU_X86Compatible;
Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible");
Platform::SystemInfo.processor.mhz = 0;
Platform::SystemInfo.processor.properties = CPU_PROP_C;
char vendor[0x20];
dMemset(vendor, 0, sizeof(vendor));
U32 properties = 0;
U32 processor = 0;
U32 properties2 = 0;
S32 vendorInfo[4];
__cpuid(vendorInfo, 0);
*reinterpret_cast<int*>(vendor) = vendorInfo[1]; // ebx
*reinterpret_cast<int*>(vendor + 4) = vendorInfo[3]; // edx
*reinterpret_cast<int*>(vendor + 8) = vendorInfo[2]; // ecx
S32 cpuInfo[4];
__cpuid(cpuInfo, 1);
processor = cpuInfo[0]; // eax
properties = cpuInfo[3]; // edx
properties2 = cpuInfo[2]; // ecx
char brand[0x40];
dMemset(brand, 0, sizeof(brand));
S32 extendedInfo[4];
__cpuid(extendedInfo, 0x80000000);
S32 numberExtendedIds = extendedInfo[0];
@ -88,12 +54,93 @@ void Processor::init()
offset += sizeof(S32) * 4;
}
}
}
SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand, processor, properties, properties2);
enum CpuFlags
{
// EDX Register flags
BIT_MMX = BIT(23),
BIT_SSE = BIT(25),
BIT_SSE2 = BIT(26),
BIT_3DNOW = BIT(31), // only available for amd cpus in x86
// now calculate speed of processor...
U32 nearmhz = 0; // nearest rounded mhz
U32 mhz = 0; // calculated value.
// These use a different value for comparison than the above flags (ECX Register)
BIT_SSE3 = BIT(0),
BIT_SSE3ex = BIT(9),
BIT_SSE4_1 = BIT(19),
BIT_SSE4_2 = BIT(20),
BIT_XSAVE_RESTORE = BIT(27),
BIT_AVX = BIT(28),
};
static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor)
{
S32 cpuInfo[4];
__cpuid(cpuInfo, 1);
U32 eax = cpuInfo[0]; // eax
U32 edx = cpuInfo[3]; // edx
U32 ecx = cpuInfo[2]; // ecx
if (processor.type == ProcessorType::CPU_AMD)
processor.properties |= (edx & BIT_3DNOW) ? CPU_PROP_3DNOW : 0;
processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0;
processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0;
processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
processor.properties |= (ecx & BIT_SSE3) ? CPU_PROP_SSE3 : 0;
processor.properties |= (ecx & BIT_SSE3ex) ? CPU_PROP_SSE3ex : 0;
processor.properties |= (ecx & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
processor.properties |= (ecx & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
// AVX detection requires that xsaverestore is supported
if (ecx & BIT_XSAVE_RESTORE && ecx & BIT_AVX)
{
bool supportsAVX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6;
if (supportsAVX)
{
processor.properties |= CPU_PROP_AVX;
}
}
if (processor.isMultiCore)
processor.properties |= CPU_PROP_MP;
#ifdef TORQUE_CPU_X64
processor.properties |= CPU_PROP_64bit;
#endif
}
void Processor::init()
{
// Reference:
// www.cyrix.com
// www.amd.com
// www.intel.com
// http://developer.intel.com/design/PentiumII/manuals/24512701.pdf
Platform::SystemInfo.processor.type = CPU_X86Compatible;
Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible");
Platform::SystemInfo.processor.mhz = 0;
Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE;
char vendor[0x20];
dMemset(vendor, 0, sizeof(vendor));
S32 vendorInfo[4];
__cpuid(vendorInfo, 0);
*reinterpret_cast<int*>(vendor) = vendorInfo[1]; // ebx
*reinterpret_cast<int*>(vendor + 4) = vendorInfo[3]; // edx
*reinterpret_cast<int*>(vendor + 8) = vendorInfo[2]; // ecx
char brand[0x40];
dMemset(brand, 0, sizeof(brand));
getBrand(brand);
SetProcessoInfo(Platform::SystemInfo.processor, vendor, brand);
detectCpuFeatures(Platform::SystemInfo.processor);
U32 mhz = 1000; // default if it can't be found
LONG result;
DWORD data = 0;
@ -107,64 +154,37 @@ void Processor::init()
result = ::RegQueryValueExA (hKey, "~MHz",NULL, NULL,(LPBYTE)&data, &dataSize);
if (result == ERROR_SUCCESS)
nearmhz = mhz = data;
mhz = data;
::RegCloseKey(hKey);
}
Platform::SystemInfo.processor.mhz = mhz;
if (mhz==0)
{
Con::printf(" %s, (Unknown) Mhz", Platform::SystemInfo.processor.name);
// stick SOMETHING in so it isn't ZERO.
Platform::SystemInfo.processor.mhz = 200; // seems a decent value.
}
else
{
if (nearmhz >= 1000)
Con::printf(" %s, ~%.2f Ghz", Platform::SystemInfo.processor.name, ((float)nearmhz)/1000.0f);
else
Con::printf(" %s, ~%d Mhz", Platform::SystemInfo.processor.name, nearmhz);
if (nearmhz != mhz)
{
if (mhz >= 1000)
Con::printf(" (timed at roughly %.2f Ghz)", ((float)mhz)/1000.0f);
else
Con::printf(" (timed at roughly %d Mhz)", mhz);
}
}
if( Platform::SystemInfo.processor.numAvailableCores > 0
|| Platform::SystemInfo.processor.numPhysicalProcessors > 0
|| Platform::SystemInfo.processor.isHyperThreaded )
Platform::SystemInfo.processor.properties |= CPU_PROP_MP;
if (Platform::SystemInfo.processor.properties & CPU_PROP_FPU)
Con::printf( " FPU detected" );
Con::printf("Processor Init:");
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
Con::printf( " MMX detected" );
Con::printf(" MMX detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW)
Con::printf( " 3DNow detected" );
Con::printf(" 3DNow detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
Con::printf( " SSE detected" );
if( Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 )
Con::printf( " SSE2 detected" );
Con::printf(" SSE detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
Con::printf(" SSE2 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
Con::printf( " SSE3 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3xt)
Con::printf( " SSE3ex detected ");
Con::printf(" SSE3 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
Con::printf(" SSE3ex detected ");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
Con::printf( " SSE4.1 detected" );
Con::printf(" SSE4.1 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
Con::printf( " SSE4.2 detected" );
if( Platform::SystemInfo.processor.isHyperThreaded )
Con::printf( " HT detected" );
if( Platform::SystemInfo.processor.properties & CPU_PROP_MP )
Con::printf( " MP detected [%i cores, %i logical, %i physical]",
Platform::SystemInfo.processor.numAvailableCores,
Platform::SystemInfo.processor.numLogicalProcessors,
Platform::SystemInfo.processor.numPhysicalProcessors );
Con::printf(" SSE4.2 detected" );
if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
Con::printf(" AVX detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
Con::printf(" ");
PlatformBlitInit();

View file

@ -26,6 +26,7 @@
#if defined( TORQUE_OS_WIN )
#include "platform/platformCPUCount.h"
#include "console/console.h"
#include <windows.h>
#include <intrin.h>
#include <stdio.h>
@ -52,12 +53,10 @@ namespace CPUInfo {
return bitSetCount;
}
EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum )
EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore )
{
EConfig StatusFlag = CONFIG_UserConfigIssue;
TotAvailLogical = 0;
TotAvailCore = 0;
PhysicalNum = 0;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
DWORD returnLength = 0;
@ -68,42 +67,37 @@ namespace CPUInfo {
rc = GetLogicalProcessorInformation( buffer, &returnLength );
// if we fail, assume single threaded
if( FALSE == rc )
{
free( buffer );
return StatusFlag;
Con::errorf("Unable to determine CPU Count, assuming 1 core");
TotAvailCore = 1;
TotAvailLogical = 1;
return CONFIG_SingleCoreAndHTNotCapable;
}
#pragma push
#pragma warning (disable: 6011)
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
DWORD byteOffset = 0;
while( byteOffset + sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) <= returnLength )
{
switch( ptr->Relationship )
{
case RelationProcessorCore:
if (ptr->Relationship == RelationProcessorCore)
{
TotAvailCore++;
// A hyperthreaded core supplies more than one logical processor.
TotAvailLogical += CountSetBits( ptr->ProcessorMask );
break;
case RelationProcessorPackage:
// Logical processors share a physical package.
PhysicalNum++;
break;
default:
break;
TotAvailLogical += CountSetBits(ptr->ProcessorMask);
}
byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION );
ptr++;
}
}
free( buffer );
#pragma pop
StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
EConfig StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
if( TotAvailCore == 1 && TotAvailLogical > TotAvailCore )
StatusFlag = CONFIG_SingleCoreHTEnabled;