From 24e5db942be6a33c2946ba20d846249efb6e4540 Mon Sep 17 00:00:00 2001 From: Jeff Hutchinson Date: Sun, 26 Sep 2021 23:43:06 -0400 Subject: [PATCH] flush out windows cpu detection. --- Engine/source/platform/platform.h | 21 +- Engine/source/platform/platformCPUInfo.asm | 128 ------------ Engine/source/platformMac/macCPU.mm | 5 +- Engine/source/platformWin32/winCPUInfo.cpp | 192 ++++++++++-------- .../platformWin32/winPlatformCPUCount.cpp | 38 ++-- 5 files changed, 136 insertions(+), 248 deletions(-) delete mode 100644 Engine/source/platform/platformCPUInfo.asm diff --git a/Engine/source/platform/platform.h b/Engine/source/platform/platform.h index f8239f817..6073786b7 100644 --- a/Engine/source/platform/platform.h +++ b/Engine/source/platform/platform.h @@ -70,17 +70,16 @@ enum ProcessorProperties CPU_PROP_MMX = (1<<2), ///< Supports MMX instruction set extension. CPU_PROP_3DNOW = (1<<3), ///< Supports AMD 3dNow! instruction set extension. CPU_PROP_SSE = (1<<4), ///< Supports SSE instruction set extension. - CPU_PROP_RDTSC = (1<<5), ///< Supports Read Time Stamp Counter op. - CPU_PROP_SSE2 = (1<<6), ///< Supports SSE2 instruction set extension. - CPU_PROP_SSE3 = (1<<7), ///< Supports SSE3 instruction set extension. - CPU_PROP_SSE3ex = (1<<8), ///< Supports extended SSE3 instruction set - CPU_PROP_SSE4_1 = (1<<9), ///< Supports SSE4_1 instruction set extension. - CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension. - CPU_PROP_AVX = (1<<11), ///< Supports AVX256 instruction set extension. - CPU_PROP_MP = (1<<12), ///< This is a multi-processor system. - CPU_PROP_LE = (1<<13), ///< This processor is LITTLE ENDIAN. - CPU_PROP_64bit = (1<<14), ///< This processor is 64-bit capable - CPU_PROP_NEON = (1<<15), ///< Supports the Arm Neon instruction set extension. + CPU_PROP_SSE2 = (1<<5), ///< Supports SSE2 instruction set extension. + CPU_PROP_SSE3 = (1<<6), ///< Supports SSE3 instruction set extension. + CPU_PROP_SSE3ex = (1<<7), ///< Supports Supplemental SSE3 instruction set + CPU_PROP_SSE4_1 = (1<<8), ///< Supports SSE4_1 instruction set extension. + CPU_PROP_SSE4_2 = (1<<9), ///< Supports SSE4_2 instruction set extension. + CPU_PROP_AVX = (1<<10), ///< Supports AVX256 instruction set extension. + CPU_PROP_MP = (1<<11), ///< This is a multi-processor system. + CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN. + CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable + CPU_PROP_NEON = (1<<14), ///< Supports the Arm Neon instruction set extension. }; /// Processor info manager. diff --git a/Engine/source/platform/platformCPUInfo.asm b/Engine/source/platform/platformCPUInfo.asm deleted file mode 100644 index bce39d220..000000000 --- a/Engine/source/platform/platformCPUInfo.asm +++ /dev/null @@ -1,128 +0,0 @@ -;----------------------------------------------------------------------------- -; Copyright (c) 2012 GarageGames, LLC -; -; Permission is hereby granted, free of charge, to any person obtaining a copy -; of this software and associated documentation files (the "Software"), to -; deal in the Software without restriction, including without limitation the -; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -; sell copies of the Software, and to permit persons to whom the Software is -; furnished to do so, subject to the following conditions: -; -; The above copyright notice and this permission notice shall be included in -; all copies or substantial portions of the Software. -; -; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -; IN THE SOFTWARE. -;----------------------------------------------------------------------------- - - -segment .text - -; syntax: export_fn -%macro export_fn 1 - %ifidn __OUTPUT_FORMAT__, elf - ; No underscore needed for ELF object files - global %1 - %1: - %else - global _%1 - _%1: - %endif -%endmacro - -; push registers -%macro pushreg 0 -; pushad - push ebx - push ebp - push esi - push edi -%endmacro - -; pop registers -%macro popreg 0 - pop edi - pop esi - pop ebp - pop ebx -; popad -%endmacro - -; void detectX86CPUInfo(char *vendor, U32 *processor, U32 *properties); -export_fn detectX86CPUInfo - push ebp - mov ebp, esp - - pushreg - - push edx - push ecx - pushfd - pushfd ; save EFLAGS to stack - pop eax ; move EFLAGS into EAX - mov ebx, eax - xor eax, 0x200000 ; flip bit 21 - push eax - popfd ; restore EFLAGS - pushfd - pop eax - cmp eax, ebx - jz EXIT ; doesn't support CPUID instruction - - ; - ; get vendor information using CPUID eax == 0 - xor eax, eax - cpuid - - ; store the vendor tag (12 bytes in ebx, edx, ecx) in the first parameter, - ; which should be a char[13] - push eax ; save eax - mov eax, [ebp+8] ; store the char* address in eax - mov [eax], ebx ; move ebx into the first 4 bytes - add eax, 4 ; advance the char* 4 bytes - mov [eax], edx ; move edx into the next 4 bytes - add eax, 4 ; advance the char* 4 bytes - mov [eax], ecx ; move ecx into the last 4 bytes - pop eax ; restore eax - - ; get generic extended CPUID info - mov eax, 1 - cpuid ; eax=1, so cpuid queries feature information - - and eax, 0x0fff3fff - push ecx - mov ecx, [ebp+12] - mov [ecx], eax ; just store the model bits in processor param - mov ecx, [ebp+16] - mov [ecx], edx ; set properties param - pop ecx - - ; want to check for 3DNow(tm). - ; need to see if extended cpuid functions present. - mov eax, 0x80000000 - cpuid - cmp eax, 0x80000000 - jbe MAYBE_3DLATER - mov eax, 0x80000001 - cpuid - ; 3DNow if bit 31 set -> put bit in our properties - and edx, 0x80000000 - push eax - mov eax, [ebp+16] - or [eax], edx - pop eax -MAYBE_3DLATER: -EXIT: - popfd - pop ecx - pop edx - - popreg - - pop ebp - ret diff --git a/Engine/source/platformMac/macCPU.mm b/Engine/source/platformMac/macCPU.mm index 96cf068a9..d93bcf25f 100644 --- a/Engine/source/platformMac/macCPU.mm +++ b/Engine/source/platformMac/macCPU.mm @@ -96,7 +96,7 @@ static void detectCpuFeatures(U32 &procflags) U32 lraw; // All Cpus have fpu - procflags = CPU_PROP_FPU; + procflags = CPU_PROP_C | CPU_PROP_FPU; #if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) @@ -215,6 +215,9 @@ void Processor::init() Con::printf(" AVX detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_NEON) Con::printf(" Neon detected"); + + if (Platform::SystemInfo.processor.properties & CPU_PROP_MP) + Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors); Con::printf( "" ); diff --git a/Engine/source/platformWin32/winCPUInfo.cpp b/Engine/source/platformWin32/winCPUInfo.cpp index 3765f5c51..f8ea9ab2d 100644 --- a/Engine/source/platformWin32/winCPUInfo.cpp +++ b/Engine/source/platformWin32/winCPUInfo.cpp @@ -24,49 +24,15 @@ #include "platformWin32/platformWin32.h" #include "console/console.h" #include "core/stringTable.h" +#include "platform/platformCPUCount.h" #include #include Platform::SystemInfo_struct Platform::SystemInfo; extern void PlatformBlitInit(); -extern void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, - char* vendor, char* brand, U32 processor, U32 properties, U32 properties2); // platform/platformCPU.cc -void Processor::init() +static void getBrand(char* brand) { - // Reference: - // www.cyrix.com - // www.amd.com - // www.intel.com - // http://developer.intel.com/design/PentiumII/manuals/24512701.pdf - - Con::printf("Processor Init:"); - - Platform::SystemInfo.processor.type = CPU_X86Compatible; - Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible"); - Platform::SystemInfo.processor.mhz = 0; - Platform::SystemInfo.processor.properties = CPU_PROP_C; - - char vendor[0x20]; - dMemset(vendor, 0, sizeof(vendor)); - U32 properties = 0; - U32 processor = 0; - U32 properties2 = 0; - - S32 vendorInfo[4]; - __cpuid(vendorInfo, 0); - *reinterpret_cast(vendor) = vendorInfo[1]; // ebx - *reinterpret_cast(vendor + 4) = vendorInfo[3]; // edx - *reinterpret_cast(vendor + 8) = vendorInfo[2]; // ecx - - S32 cpuInfo[4]; - __cpuid(cpuInfo, 1); - processor = cpuInfo[0]; // eax - properties = cpuInfo[3]; // edx - properties2 = cpuInfo[2]; // ecx - - char brand[0x40]; - dMemset(brand, 0, sizeof(brand)); S32 extendedInfo[4]; __cpuid(extendedInfo, 0x80000000); S32 numberExtendedIds = extendedInfo[0]; @@ -88,12 +54,93 @@ void Processor::init() offset += sizeof(S32) * 4; } } +} - SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand, processor, properties, properties2); +enum CpuFlags +{ + // EDX Register flags + BIT_MMX = BIT(23), + BIT_SSE = BIT(25), + BIT_SSE2 = BIT(26), + BIT_3DNOW = BIT(31), // only available for amd cpus in x86 -// now calculate speed of processor... - U32 nearmhz = 0; // nearest rounded mhz - U32 mhz = 0; // calculated value. + // These use a different value for comparison than the above flags (ECX Register) + BIT_SSE3 = BIT(0), + BIT_SSE3ex = BIT(9), + BIT_SSE4_1 = BIT(19), + BIT_SSE4_2 = BIT(20), + + BIT_XSAVE_RESTORE = BIT(27), + BIT_AVX = BIT(28), +}; + +static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor) +{ + S32 cpuInfo[4]; + __cpuid(cpuInfo, 1); + U32 eax = cpuInfo[0]; // eax + U32 edx = cpuInfo[3]; // edx + U32 ecx = cpuInfo[2]; // ecx + + if (processor.type == ProcessorType::CPU_AMD) + processor.properties |= (edx & BIT_3DNOW) ? CPU_PROP_3DNOW : 0; + + processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0; + processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0; + processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0; + processor.properties |= (ecx & BIT_SSE3) ? CPU_PROP_SSE3 : 0; + processor.properties |= (ecx & BIT_SSE3ex) ? CPU_PROP_SSE3ex : 0; + processor.properties |= (ecx & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0; + processor.properties |= (ecx & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0; + + // AVX detection requires that xsaverestore is supported + if (ecx & BIT_XSAVE_RESTORE && ecx & BIT_AVX) + { + bool supportsAVX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6; + if (supportsAVX) + { + processor.properties |= CPU_PROP_AVX; + } + } + + if (processor.isMultiCore) + processor.properties |= CPU_PROP_MP; + +#ifdef TORQUE_CPU_X64 + processor.properties |= CPU_PROP_64bit; +#endif +} + +void Processor::init() +{ + // Reference: + // www.cyrix.com + // www.amd.com + // www.intel.com + // http://developer.intel.com/design/PentiumII/manuals/24512701.pdf + + Platform::SystemInfo.processor.type = CPU_X86Compatible; + Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible"); + Platform::SystemInfo.processor.mhz = 0; + Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE; + + char vendor[0x20]; + dMemset(vendor, 0, sizeof(vendor)); + + S32 vendorInfo[4]; + __cpuid(vendorInfo, 0); + *reinterpret_cast(vendor) = vendorInfo[1]; // ebx + *reinterpret_cast(vendor + 4) = vendorInfo[3]; // edx + *reinterpret_cast(vendor + 8) = vendorInfo[2]; // ecx + + char brand[0x40]; + dMemset(brand, 0, sizeof(brand)); + getBrand(brand); + + SetProcessoInfo(Platform::SystemInfo.processor, vendor, brand); + detectCpuFeatures(Platform::SystemInfo.processor); + + U32 mhz = 1000; // default if it can't be found LONG result; DWORD data = 0; @@ -107,64 +154,37 @@ void Processor::init() result = ::RegQueryValueExA (hKey, "~MHz",NULL, NULL,(LPBYTE)&data, &dataSize); if (result == ERROR_SUCCESS) - nearmhz = mhz = data; + mhz = data; ::RegCloseKey(hKey); } Platform::SystemInfo.processor.mhz = mhz; - if (mhz==0) - { - Con::printf(" %s, (Unknown) Mhz", Platform::SystemInfo.processor.name); - // stick SOMETHING in so it isn't ZERO. - Platform::SystemInfo.processor.mhz = 200; // seems a decent value. - } - else - { - if (nearmhz >= 1000) - Con::printf(" %s, ~%.2f Ghz", Platform::SystemInfo.processor.name, ((float)nearmhz)/1000.0f); - else - Con::printf(" %s, ~%d Mhz", Platform::SystemInfo.processor.name, nearmhz); - if (nearmhz != mhz) - { - if (mhz >= 1000) - Con::printf(" (timed at roughly %.2f Ghz)", ((float)mhz)/1000.0f); - else - Con::printf(" (timed at roughly %d Mhz)", mhz); - } - } - - if( Platform::SystemInfo.processor.numAvailableCores > 0 - || Platform::SystemInfo.processor.numPhysicalProcessors > 0 - || Platform::SystemInfo.processor.isHyperThreaded ) - Platform::SystemInfo.processor.properties |= CPU_PROP_MP; - - if (Platform::SystemInfo.processor.properties & CPU_PROP_FPU) - Con::printf( " FPU detected" ); + Con::printf("Processor Init:"); + Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) - Con::printf( " MMX detected" ); + Con::printf(" MMX detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW) - Con::printf( " 3DNow detected" ); + Con::printf(" 3DNow detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) - Con::printf( " SSE detected" ); - if( Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 ) - Con::printf( " SSE2 detected" ); + Con::printf(" SSE detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) + Con::printf(" SSE2 detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3) - Con::printf( " SSE3 detected" ); - if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3xt) - Con::printf( " SSE3ex detected "); + Con::printf(" SSE3 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex) + Con::printf(" SSE3ex detected "); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1) - Con::printf( " SSE4.1 detected" ); + Con::printf(" SSE4.1 detected" ); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2) - Con::printf( " SSE4.2 detected" ); - if( Platform::SystemInfo.processor.isHyperThreaded ) - Con::printf( " HT detected" ); - if( Platform::SystemInfo.processor.properties & CPU_PROP_MP ) - Con::printf( " MP detected [%i cores, %i logical, %i physical]", - Platform::SystemInfo.processor.numAvailableCores, - Platform::SystemInfo.processor.numLogicalProcessors, - Platform::SystemInfo.processor.numPhysicalProcessors ); + Con::printf(" SSE4.2 detected" ); + if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX) + Con::printf(" AVX detected"); + + if (Platform::SystemInfo.processor.properties & CPU_PROP_MP) + Con::printf(" MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors); + Con::printf(" "); PlatformBlitInit(); diff --git a/Engine/source/platformWin32/winPlatformCPUCount.cpp b/Engine/source/platformWin32/winPlatformCPUCount.cpp index e4a5d54d6..e4b113c67 100644 --- a/Engine/source/platformWin32/winPlatformCPUCount.cpp +++ b/Engine/source/platformWin32/winPlatformCPUCount.cpp @@ -26,6 +26,7 @@ #if defined( TORQUE_OS_WIN ) #include "platform/platformCPUCount.h" +#include "console/console.h" #include #include #include @@ -52,12 +53,10 @@ namespace CPUInfo { return bitSetCount; } - EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum ) + EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore ) { - EConfig StatusFlag = CONFIG_UserConfigIssue; TotAvailLogical = 0; TotAvailCore = 0; - PhysicalNum = 0; PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; DWORD returnLength = 0; @@ -68,42 +67,37 @@ namespace CPUInfo { rc = GetLogicalProcessorInformation( buffer, &returnLength ); + // if we fail, assume single threaded if( FALSE == rc ) { free( buffer ); - return StatusFlag; + Con::errorf("Unable to determine CPU Count, assuming 1 core"); + TotAvailCore = 1; + TotAvailLogical = 1; + return CONFIG_SingleCoreAndHTNotCapable; } +#pragma push +#pragma warning (disable: 6011) PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer; DWORD byteOffset = 0; while( byteOffset + sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) <= returnLength ) { - switch( ptr->Relationship ) - { - - case RelationProcessorCore: + if (ptr->Relationship == RelationProcessorCore) + { TotAvailCore++; - - // A hyperthreaded core supplies more than one logical processor. - TotAvailLogical += CountSetBits( ptr->ProcessorMask ); - break; - - case RelationProcessorPackage: - // Logical processors share a physical package. - PhysicalNum++; - break; - - default: - break; + TotAvailLogical += CountSetBits(ptr->ProcessorMask); } + byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ); ptr++; - } + } free( buffer ); +#pragma pop - StatusFlag = CONFIG_SingleCoreAndHTNotCapable; + EConfig StatusFlag = CONFIG_SingleCoreAndHTNotCapable; if( TotAvailCore == 1 && TotAvailLogical > TotAvailCore ) StatusFlag = CONFIG_SingleCoreHTEnabled;