From 7cb306b65a3b5551b4ae4c80a001e27e3c162bcc Mon Sep 17 00:00:00 2001 From: Jeff Hutchinson Date: Sat, 25 Sep 2021 22:39:40 -0400 Subject: [PATCH] macos platform fixes for intel macs --- Engine/source/platform/platform.h | 6 +- Engine/source/platform/platformCPU.cpp | 53 +- Engine/source/platform/platformCPUCount.cpp | 657 ------------------ Engine/source/platform/platformCPUCount.h | 11 +- Engine/source/platform/threads/threadPool.cpp | 3 +- Engine/source/platformMac/macCPU.mm | 287 ++++---- 6 files changed, 132 insertions(+), 885 deletions(-) delete mode 100644 Engine/source/platform/platformCPUCount.cpp diff --git a/Engine/source/platform/platform.h b/Engine/source/platform/platform.h index 35cd1a687..4d8d22f3c 100644 --- a/Engine/source/platform/platform.h +++ b/Engine/source/platform/platform.h @@ -73,9 +73,10 @@ enum ProcessorProperties CPU_PROP_RDTSC = (1<<5), ///< Supports Read Time Stamp Counter op. CPU_PROP_SSE2 = (1<<6), ///< Supports SSE2 instruction set extension. CPU_PROP_SSE3 = (1<<7), ///< Supports SSE3 instruction set extension. - CPU_PROP_SSE3xt = (1<<8), ///< Supports extended SSE3 instruction set + CPU_PROP_SSE3ex = (1<<8), ///< Supports extended SSE3 instruction set CPU_PROP_SSE4_1 = (1<<9), ///< Supports SSE4_1 instruction set extension. - CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension. + CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension. + CPU_PROP_AVX = (1<<11), ///< Supports AVX256 instruction set extension. CPU_PROP_MP = (1<<11), ///< This is a multi-processor system. CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN. CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable @@ -297,7 +298,6 @@ namespace Platform bool isHyperThreaded; U32 numLogicalProcessors; U32 numPhysicalProcessors; - U32 numAvailableCores; U32 properties; // CPU type specific enum } processor; }; diff --git a/Engine/source/platform/platformCPU.cpp b/Engine/source/platform/platformCPU.cpp index d0a852431..8449d1daa 100644 --- a/Engine/source/platform/platformCPU.cpp +++ b/Engine/source/platform/platformCPU.cpp @@ -28,48 +28,8 @@ Signal Platform::SystemInfoReady; -enum CPUFlags +void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand) { - // EDX Register flags - BIT_RDTSC = BIT(4), - BIT_MMX = BIT(23), - BIT_SSE = BIT(25), - BIT_SSE2 = BIT(26), - BIT_3DNOW = BIT(31), // only available for amd cpus in x86 - - // These use a different value for comparison than the above flags (ECX Register) - BIT_SSE3 = BIT(0), - BIT_SSE3xt = BIT(9), - BIT_SSE4_1 = BIT(19), - BIT_SSE4_2 = BIT(20), -}; - -// fill the specified structure with information obtained from asm code -void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, - char* vendor, char* brand, U32 processor, U32 properties, U32 properties2) -{ - // always assume FPU is available in 2021... - pInfo.properties |= CPU_PROP_FPU; - -#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64) - pInfo.properties |= CPU_PROP_LE; -#endif - -#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64) - pInfo.properties |= CPU_PROP_64bit; -#endif - -#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) - pInfo.properties |= (properties & BIT_RDTSC) ? CPU_PROP_RDTSC : 0; - pInfo.properties |= (properties & BIT_MMX) ? CPU_PROP_MMX : 0; - pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0; - pInfo.properties |= (properties & BIT_SSE2) ? CPU_PROP_SSE2 : 0; - pInfo.properties |= (properties2 & BIT_SSE3) ? CPU_PROP_SSE3 : 0; - pInfo.properties |= (properties2 & BIT_SSE3xt) ? CPU_PROP_SSE3xt : 0; - pInfo.properties |= (properties2 & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0; - pInfo.properties |= (properties2 & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0; -#endif - if (dStricmp(vendor, "GenuineIntel") == 0) { pInfo.type = CPU_Intel; @@ -80,9 +40,6 @@ void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, { pInfo.name = StringTable->insert(brand ? brand : "AMD (unknown)"); pInfo.type = CPU_AMD; - - // 3dnow! is only available in AMD cpus on x86. Otherwise its not reliably set. - pInfo.properties |= (properties & BIT_3DNOW) ? CPU_PROP_3DNOW : 0; } else if (dStricmp(vendor, "Apple") == 0) { @@ -92,18 +49,22 @@ void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, else { #if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) + pInfo.name = StringTable->insert(brand ? brand : "x86 Compatible (unknown)"); pInfo.type = CPU_X86Compatible; + #elif defined(TORQUE_CPU_ARM64) pInfo.name = StringTable->insert(brand ? brand : "Arm Compatible (unknown)"); pInfo.type = CPU_ArmCompatible; + #else #error "Unknown CPU Architecture" + #endif } - + // Get multithreading caps. - CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numAvailableCores, pInfo.numPhysicalProcessors ); + CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numPhysicalProcessors ); pInfo.isHyperThreaded = CPUInfo::isHyperThreaded( config ); pInfo.isMultiCore = CPUInfo::isMultiCore( config ); diff --git a/Engine/source/platform/platformCPUCount.cpp b/Engine/source/platform/platformCPUCount.cpp deleted file mode 100644 index a3fe99d67..000000000 --- a/Engine/source/platform/platformCPUCount.cpp +++ /dev/null @@ -1,657 +0,0 @@ -// Original code is: -// Copyright (c) 2005 Intel Corporation -// All Rights Reserved -// -// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform -// The three forms of HW multithreading are: Multi-processor, Multi-core, and -// HyperThreading Technology. -// This application enumerates all the logical processors enabled by OS and BIOS, -// determine the HW topology of these enabled logical processors in the system -// using information provided by CPUID instruction. -// A multi-processing system can support any combination of the three forms of HW -// multi-threading support. The relevant topology can be identified using a -// three level decomposition of the "initial APIC ID" into -// Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of -// the topology of hardware resources and -// allow multi-threaded software to manage shared hardware resources in -// the platform to reduce resource contention - -// Multicore detection algorithm for processor and cache topology requires -// all leaf functions of CPUID instructions be available. System administrator -// must ensure BIOS settings is not configured to restrict CPUID functionalities. -//------------------------------------------------------------------------------------------------- - -#if defined(TORQUE_OS_LINUX) || defined(LINUX) - -// TODO GCC code don't compile on Release with optimizations, mover code to platform layer - -#else - -#include "platform/platform.h" -#include "platform/platformCPUCount.h" - -#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX) - -#ifdef TORQUE_OS_LINUX -// The Linux source code listing can be compiled using Linux kernel verison 2.6 -// or higher (e.g. RH 4AS-2.8 using GCC 3.4.4). -// Due to syntax variances of Linux affinity APIs with earlier kernel versions -// and dependence on glibc library versions, compilation on Linux environment -// with older kernels and compilers may require kernel patches or compiler upgrades. - -#include -#include -#include -#include -#define DWORD unsigned long -#elif defined( TORQUE_OS_WIN ) -#include -#elif defined( TORQUE_OS_MAC ) -# include -# include -#else -#error Not implemented on platform. -#endif -#include -#include - -namespace CPUInfo { - -#define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported -#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical - // processors per physical processor when execute cpuid with - // eax set to 1 -#define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one - // per physical processor when execute cpuid with - // eax set to 4. - - -#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique - // initial APIC ID for the processor this code is running on. - - - #ifndef TORQUE_OS_MAC - static U32 CpuIDSupported(void); - static U32 find_maskwidth(unsigned int); - static U32 HWD_MTSupported(void); - static U32 MaxLogicalProcPerPhysicalProc(void); - static U32 MaxCorePerPhysicalProc(void); - static U8 GetAPIC_ID(void); - static U8 GetNzbSubID(U8, U8, U8); - #endif - - static char g_s3Levels[2048]; - -#ifndef TORQUE_OS_MAC - - // - // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return - // the maximum supported standard function. - // - static U32 CpuIDSupported(void) - { - U32 maxInputValue = 0; - // If CPUID instruction is supported -#ifdef TORQUE_COMPILER_GCC - try - { - // call cpuid with eax = 0 - asm - ( - "pushl %%ebx\n\t" - "xorl %%eax,%%eax\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=a" (maxInputValue) - : - : "%ecx", "%edx" - ); - } - catch (...) - { - return(0); // cpuid instruction is unavailable - } -#elif defined( TORQUE_COMPILER_VISUALC ) - try - { - // call cpuid with eax = 0 - __asm - { - xor eax, eax - cpuid - mov maxInputValue, eax - } - } - catch (...) - { - // cpuid instruction is unavailable - } -#else -# error Not implemented. -#endif - - return maxInputValue; - } - - - - // - // Function returns the maximum cores per physical package. Note that the number of - // AVAILABLE cores per physical to be used by an application might be less than this - // maximum value. - // - - static U32 MaxCorePerPhysicalProc(void) - { - - U32 Regeax = 0; - - if (!HWD_MTSupported()) return (U32) 1; // Single core -#ifdef TORQUE_COMPILER_GCC - { - asm - ( - "pushl %ebx\n\t" - "xorl %eax, %eax\n\t" - "cpuid\n\t" - "cmpl $4, %eax\n\t" // check if cpuid supports leaf 4 - "jl .single_core\n\t" // Single core - "movl $4, %eax\n\t" - "movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports - "popl %ebx\n\t" - ); // at least one valid cache level - asm - ( - "cpuid" - : "=a" (Regeax) - : - : "%ecx", "%edx" - ); - asm - ( - "jmp .multi_core\n" - ".single_core:\n\t" - "xor %eax, %eax\n" - ".multi_core:" - ); - } -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - xor eax, eax - cpuid - cmp eax, 4 // check if cpuid supports leaf 4 - jl single_core // Single core - mov eax, 4 - mov ecx, 0 // start with index = 0; Leaf 4 reports - cpuid // at least one valid cache level - mov Regeax, eax - jmp multi_core - -single_core: - xor eax, eax - -multi_core: - - } -#else -# error Not implemented. -#endif - return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1; - - } - - - - // - // The function returns 0 when the hardware multi-threaded bit is not set. - // - static U32 HWD_MTSupported(void) - { - - - U32 Regedx = 0; - - - if ((CpuIDSupported() >= 1)) - { -#ifdef TORQUE_COMPILER_GCC - asm - ( - "pushl %%ebx\n\t" - "movl $1,%%eax\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=d" (Regedx) - : - : "%eax","%ecx" - ); -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, 1 - cpuid - mov Regedx, edx - } -#else -# error Not implemented. -#endif - } - - return (Regedx & HWD_MT_BIT); - - - } - - - - // - // Function returns the maximum logical processors per physical package. Note that the number of - // AVAILABLE logical processors per physical to be used by an application might be less than this - // maximum value. - // - static U32 MaxLogicalProcPerPhysicalProc(void) - { - - U32 Regebx = 0; - - if (!HWD_MTSupported()) return (U32) 1; -#ifdef TORQUE_COMPILER_GCC - asm - ( - "movl $1,%%eax\n\t" - "cpuid" - : "=b" (Regebx) - : - : "%eax","%ecx","%edx" - ); -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, 1 - cpuid - mov Regebx, ebx - } -#else -# error Not implemented. -#endif - return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16); - - } - - - static U8 GetAPIC_ID(void) - { - - U32 Regebx = 0; -#ifdef TORQUE_COMPILER_GCC - asm - ( - "movl $1, %%eax\n\t" - "cpuid" - : "=b" (Regebx) - : - : "%eax","%ecx","%edx" - ); - -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, 1 - cpuid - mov Regebx, ebx - } -#else -# error Not implemented. -#endif - - return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24); - - } - - // - // Determine the width of the bit field that can represent the value count_item. - // - U32 find_maskwidth(U32 CountItem) - { - U32 MaskWidth, - count = CountItem; -#ifdef TORQUE_COMPILER_GCC - asm - ( -#ifdef __x86_64__ // define constant to compile - "push %%rcx\n\t" // under 64-bit Linux - "push %%rax\n\t" -#else - "pushl %%ecx\n\t" - "pushl %%eax\n\t" -#endif - // "movl $count, %%eax\n\t" //done by Assembler below - "xorl %%ecx, %%ecx" - // "movl %%ecx, MaskWidth\n\t" //done by Assembler below - : "=c" (MaskWidth) - : "a" (count) - // : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler - //to put them back when we are done - ); - asm - ( - "decl %%eax\n\t" - "bsrw %%ax,%%cx\n\t" - "jz next\n\t" - "incw %%cx\n\t" - // "movl %%ecx, MaskWidth\n" //done by Assembler below - : "=c" (MaskWidth) - : - ); - asm - ( - "next:\n\t" -#ifdef __x86_64__ - "pop %rax\n\t" - "pop %rcx" -#else - "popl %eax\n\t" - "popl %ecx" -#endif - ); - -#elif defined( TORQUE_COMPILER_VISUALC ) - __asm - { - mov eax, count - mov ecx, 0 - mov MaskWidth, ecx - dec eax - bsr cx, ax - jz next - inc cx - mov MaskWidth, ecx -next: - - } -#else -# error Not implemented. -#endif - return MaskWidth; - } - - - // - // Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value - // - static U8 GetNzbSubID(U8 FullID, - U8 MaxSubIDValue, - U8 ShiftCount) - { - U32 MaskWidth; - U8 MaskBits; - - MaskWidth = find_maskwidth((U32) MaxSubIDValue); - MaskBits = (0xff << ShiftCount) ^ - ((U8) (0xff << (ShiftCount + MaskWidth))); - - return (FullID & MaskBits); - } - -#endif - - - // - // - // - EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum) - { - EConfig StatusFlag = CONFIG_UserConfigIssue; - - g_s3Levels[0] = 0; - TotAvailCore = 1; - PhysicalNum = 1; - - U32 numLPEnabled = 0; - S32 MaxLPPerCore = 1; - -#ifdef TORQUE_OS_MAC - - //FIXME: This isn't a proper port but more or less just some sneaky cheating - // to get around having to mess with yet another crap UNIX-style API. Seems - // like there isn't a way to do this that's working across all OSX incarnations - // and machine configurations anyway. - - S32 numCPUs; - S32 numPackages; - - // Get the number of CPUs. - - size_t len = sizeof( numCPUs ); - if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 ) - return CONFIG_UserConfigIssue; - - // Get the number of packages. - len = sizeof( numPackages ); - if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 ) - return CONFIG_UserConfigIssue; - - TotAvailCore = numCPUs; - TotAvailLogical = numCPUs; - PhysicalNum = numPackages; -#else - - U32 dwAffinityMask; - S32 j = 0; - U8 apicID, PackageIDMask; - U8 tblPkgID[256], tblCoreID[256], tblSMTID[256]; - char tmp[256]; - -#ifdef TORQUE_OS_LINUX - //we need to make sure that this process is allowed to run on - //all of the logical processors that the OS itself can run on. - //A process could acquire/inherit affinity settings that restricts the - // current process to run on a subset of all logical processor visible to OS. - - // Linux doesn't easily allow us to look at the Affinity Bitmask directly, - // but it does provide an API to test affinity maskbits of the current process - // against each logical processor visible under OS. - S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many - //CPUs are currently enabled. - - //this will tell us which processors this process can run on. - cpu_set_t allowedCPUs; - sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs); - - for (S32 i = 0; i < sysNumProcs; i++ ) - { - if ( CPU_ISSET(i, &allowedCPUs) == 0 ) - return CONFIG_UserConfigIssue; - } -#elif defined( TORQUE_OS_WIN ) - DWORD dwProcessAffinity, dwSystemAffinity; - GetProcessAffinityMask(GetCurrentProcess(), - &dwProcessAffinity, - &dwSystemAffinity); - if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled - return CONFIG_UserConfigIssue; -#else -# error Not implemented. -#endif - - // Assume that cores within a package have the SAME number of - // logical processors. Also, values returned by - // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have - // to be power of 2. - - MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc(); - dwAffinityMask = 1; - -#ifdef TORQUE_OS_LINUX - cpu_set_t currentCPU; - while ( j < sysNumProcs ) - { - CPU_ZERO(¤tCPU); - CPU_SET(j, ¤tCPU); - if ( sched_setaffinity (0, sizeof(currentCPU), ¤tCPU) == 0 ) - { - sleep(0); // Ensure system to switch to the right CPU -#elif defined( TORQUE_OS_WIN ) - while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity) - { - if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask)) - { - Sleep(0); // Ensure system to switch to the right CPU -#else -# error Not implemented. -#endif - apicID = GetAPIC_ID(); - - - // Store SMT ID and core ID of each logical processor - // Shift vlaue for SMT ID is 0 - // Shift value for core ID is the mask width for maximum logical - // processors per core - - tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0); - U8 maxCorePPP = MaxCorePerPhysicalProc(); - U8 maskWidth = find_maskwidth(MaxLPPerCore); - tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth); - - // Extract package ID, assume single cluster. - // Shift value is the mask width for max Logical per package - - PackageIDMask = (unsigned char) (0xff << - find_maskwidth(MaxLogicalProcPerPhysicalProc())); - - tblPkgID[j] = apicID & PackageIDMask; - sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n", - dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]); - dStrcat(g_s3Levels, tmp, 2048); - - numLPEnabled ++; // Number of available logical processors in the system. - - } // if - - j++; - dwAffinityMask = 1 << j; - } // while - - // restore the affinity setting to its original state -#ifdef TORQUE_OS_LINUX - sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs); - sleep(0); -#elif defined( TORQUE_OS_WIN ) - SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity); - Sleep(0); -#else -# error Not implemented. -#endif - TotAvailLogical = numLPEnabled; - - // - // Count available cores (TotAvailCore) in the system - // - U8 CoreIDBucket[256]; - DWORD ProcessorMask, pCoreMask[256]; - U32 i, ProcessorNum; - - CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0]; - ProcessorMask = 1; - pCoreMask[0] = ProcessorMask; - - for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++) - { - ProcessorMask <<= 1; - for (i = 0; i < TotAvailCore; i++) - { - // Comparing bit-fields of logical processors residing in different packages - // Assuming the bit-masks are the same on all processors in the system. - if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i]) - { - pCoreMask[i] |= ProcessorMask; - break; - } - - } // for i - - if (i == TotAvailCore) // did not match any bucket. Start a new one. - { - CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]; - pCoreMask[i] = ProcessorMask; - - TotAvailCore++; // Number of available cores in the system - - } - - } // for ProcessorNum - - - // - // Count physical processor (PhysicalNum) in the system - // - U8 PackageIDBucket[256]; - DWORD pPackageMask[256]; - - PackageIDBucket[0] = tblPkgID[0]; - ProcessorMask = 1; - pPackageMask[0] = ProcessorMask; - - for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++) - { - ProcessorMask <<= 1; - for (i = 0; i < PhysicalNum; i++) - { - // Comparing bit-fields of logical processors residing in different packages - // Assuming the bit-masks are the same on all processors in the system. - if (tblPkgID[ProcessorNum]== PackageIDBucket[i]) - { - pPackageMask[i] |= ProcessorMask; - break; - } - - } // for i - - if (i == PhysicalNum) // did not match any bucket. Start a new one. - { - PackageIDBucket[i] = tblPkgID[ProcessorNum]; - pPackageMask[i] = ProcessorMask; - - PhysicalNum++; // Total number of physical processors in the system - - } - - } // for ProcessorNum -#endif - - // - // Check to see if the system is multi-core - // Check if the system is hyper-threading - // - if (TotAvailCore > PhysicalNum) - { - // Multi-core - if (MaxLPPerCore == 1) - StatusFlag = CONFIG_MultiCoreAndHTNotCapable; - else if (numLPEnabled > TotAvailCore) - StatusFlag = CONFIG_MultiCoreAndHTEnabled; - else StatusFlag = CONFIG_MultiCoreAndHTDisabled; - - } - else - { - // Single-core - if (MaxLPPerCore == 1) - StatusFlag = CONFIG_SingleCoreAndHTNotCapable; - else if (numLPEnabled > TotAvailCore) - StatusFlag = CONFIG_SingleCoreHTEnabled; - else StatusFlag = CONFIG_SingleCoreHTDisabled; - - - } - - - - return StatusFlag; - } - -} // namespace CPUInfo -#endif - -#endif diff --git a/Engine/source/platform/platformCPUCount.h b/Engine/source/platform/platformCPUCount.h index 2ee07c2eb..d008201b3 100644 --- a/Engine/source/platform/platformCPUCount.h +++ b/Engine/source/platform/platformCPUCount.h @@ -29,13 +29,10 @@ namespace CPUInfo { enum EConfig { - CONFIG_UserConfigIssue, CONFIG_SingleCoreHTEnabled, - CONFIG_SingleCoreHTDisabled, CONFIG_SingleCoreAndHTNotCapable, CONFIG_MultiCoreAndHTNotCapable, CONFIG_MultiCoreAndHTEnabled, - CONFIG_MultiCoreAndHTDisabled, }; inline bool isMultiCore( EConfig config ) @@ -44,7 +41,6 @@ namespace CPUInfo { case CONFIG_MultiCoreAndHTNotCapable: case CONFIG_MultiCoreAndHTEnabled: - case CONFIG_MultiCoreAndHTDisabled: return true; default: @@ -65,11 +61,10 @@ namespace CPUInfo } } - EConfig CPUCount( U32& totalAvailableLogical, - U32& totalAvailableCores, - U32& numPhysical ); - + EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores ); } // namespace CPUInfo +void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand); + #endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_ diff --git a/Engine/source/platform/threads/threadPool.cpp b/Engine/source/platform/threads/threadPool.cpp index 86402522f..1cad50dda 100644 --- a/Engine/source/platform/threads/threadPool.cpp +++ b/Engine/source/platform/threads/threadPool.cpp @@ -322,10 +322,9 @@ ThreadPool::ThreadPool( const char* name, U32 numThreads ) // Platform::SystemInfo will not yet have been initialized. U32 numLogical = 0; - U32 numPhysical = 0; U32 numCores = 0; - CPUInfo::CPUCount( numLogical, numCores, numPhysical ); + CPUInfo::CPUCount( numLogical, numCores ); const U32 baseCount = getMax( numLogical, numCores ); mNumThreads = (baseCount > 0) ? baseCount : 2; diff --git a/Engine/source/platformMac/macCPU.mm b/Engine/source/platformMac/macCPU.mm index 24a1a8a62..8987cff75 100644 --- a/Engine/source/platformMac/macCPU.mm +++ b/Engine/source/platformMac/macCPU.mm @@ -35,15 +35,6 @@ // we now have to use NSProcessInfo #import -//recently removed in Xcode 8 - most likely don't need these anymore -#ifndef CPUFAMILY_INTEL_YONAH -#define CPUFAMILY_INTEL_YONAH 0x73d67300 -#endif - -#ifndef CPUFAMILY_INTEL_MEROM -#define CPUFAMILY_INTEL_MEROM 0x426f69ef -#endif - // Original code by Sean O'Brien (http://www.garagegames.com/community/forums/viewthread/81815). @@ -89,8 +80,58 @@ int _getSysCTLvalue(const char key[], T * dest) { Platform::SystemInfo_struct Platform::SystemInfo; -#define BASE_MHZ_SPEED 0 -//TODO update cpu list +#define BASE_MHZ_SPEED 1000 + +static void detectCpuFeatures(U32 &procflags) +{ + // Now we can directly query the system about a litany of "Optional" processor capabilities + // and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request + // a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists + // >>>> BUT <<<<< + // it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's + // actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined. + + int err; + U32 lraw; + + // List of chip-specific features + err = _getSysCTLvalue("hw.optional.mmx", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_MMX; + err = _getSysCTLvalue("hw.optional.sse", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE; + err = _getSysCTLvalue("hw.optional.sse2", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE2; + err = _getSysCTLvalue("hw.optional.sse3", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE3; + err = _getSysCTLvalue("hw.optional.supplementalsse3", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE3ex; + err = _getSysCTLvalue("hw.optional.sse4_1", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE4_1; + err = _getSysCTLvalue("hw.optional.sse4_2", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_SSE4_2; + err = _getSysCTLvalue("hw.optional.avx1_0", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_AVX; + + err = _getSysCTLvalue("hw.ncpu", &lraw); + if ((err==0)&&(lraw>1)) + procflags |= CPU_PROP_MP; + err = _getSysCTLvalue("hw.cpu64bit_capable", &lraw); + if ((err==0)&&(lraw==1)) + procflags |= CPU_PROP_64bit; + err = _getSysCTLvalue("hw.byteorder", &lraw); + if ((err==0)&&(lraw==1234)) + procflags |= CPU_PROP_LE; + +} + void Processor::init() { U32 procflags; @@ -98,178 +139,64 @@ void Processor::init() char buf[255]; U32 lraw; U64 llraw; - - Con::printf( "System & Processor Information:" ); - // Gestalt has been deprecated since Mac OSX Mountain Lion and has stopped working on - // Mac OSX Yosemite. we have to use NSProcessInfo now. // Availability: Mac OS 10.2 or greater. NSString *osVersionStr = [[NSProcessInfo processInfo] operatingSystemVersionString]; - Con::printf( " OSX Version: %s", [osVersionStr UTF8String]); - - err = _getSysCTLstring("kern.ostype", buf, sizeof(buf)); - if (err) - Con::printf( " Unable to determine OS type\n" ); - else - Con::printf( " Mac OS Kernel name: %s", buf); - - err = _getSysCTLstring("kern.osrelease", buf, sizeof(buf)); - if (err) - Con::printf( " Unable to determine OS release number\n" ); - else - Con::printf( " Mac OS Kernel version: %s", buf ); - + + S32 ramMB; err = _getSysCTLvalue("hw.memsize", &llraw); if (err) - Con::printf( " Unable to determine amount of physical RAM\n" ); + ramMB = 512; else - Con::printf( " Physical memory installed: %d MB", (llraw >> 20)); - - err = _getSysCTLvalue("hw.usermem", &lraw); - if (err) - Con::printf( " Unable to determine available user address space\n"); - else - Con::printf( " Addressable user memory: %d MB", (lraw >> 20)); - - //////////////////////////////// - // Values for the Family Type, CPU Type and CPU Subtype are defined in the - // SDK files for the Mach Kernel ==> mach/machine.h - //////////////////////////////// - - // CPU Family, Type, and Subtype - cpufam = 0; - cputype = 0; - cpusub = 0; - err = _getSysCTLvalue("hw.cpufamily", &lraw); - if (err) - Con::printf( " Unable to determine 'family' of CPU\n"); - else { - cpufam = (int) lraw; - err = _getSysCTLvalue("hw.cputype", &lraw); - if (err) - Con::printf( " Unable to determine CPU type\n"); - else { - cputype = (int) lraw; - err = _getSysCTLvalue("hw.cpusubtype", &lraw); - if (err) - Con::printf( " Unable to determine CPU subtype\n"); - else - cpusub = (int) lraw; - // If we've made it this far, - Con::printf( " Installed processor ID: Family 0x%08x Type %d Subtype %d",cpufam, cputype,cpusub); - } - } + ramMB = llraw >> 20; + char brandString[256]; + err = _getSysCTLstring("machdep.cpu.brand_string", brandString, sizeof(brandString)); + if (err) + brandString[0] = '\0'; + + char vendor[256]; + err = _getSysCTLstring("machdep.cpu.vendor", vendor, sizeof(vendor)); + if (err) + vendor[0] = '\0'; + // The Gestalt version was known to have issues with some Processor Upgrade cards // but it is uncertain whether this version has similar issues. err = _getSysCTLvalue("hw.cpufrequency", &llraw); if (err) { llraw = BASE_MHZ_SPEED; - Con::printf( " Unable to determine CPU Frequency. Defaulting to %d MHz\n", llraw); } else { llraw /= 1000000; - Con::printf( " Installed processor clock frequency: %d MHz", llraw); } Platform::SystemInfo.processor.mhz = (unsigned int)llraw; - // Here's one that the original version of this routine couldn't do -- number - // of processors (cores) - U32 ncpu = 1; - err = _getSysCTLvalue("hw.ncpu", &lraw); - if (err) - Con::printf( " Unable to determine number of processor cores\n"); - else - { - ncpu = lraw; - Con::printf( " Installed/available processor cores: %d", lraw); - } - - // Now use CPUFAM to determine and then store the processor type - // and 'friendly name' in GG-accessible structure. Note that since - // we have access to the Family code, the Type and Subtypes are useless. - // - // NOTE: Even this level of detail is almost assuredly not needed anymore - // and the Optional Capability flags (further down) should be more than enough. - switch(cpufam) - { - case CPUFAMILY_INTEL_YONAH: - Platform::SystemInfo.processor.type = CPU_Intel_Core; - if( ncpu == 2 ) - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core Duo"); - else - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core"); - break; - case CPUFAMILY_INTEL_PENRYN: - case CPUFAMILY_INTEL_MEROM: - Platform::SystemInfo.processor.type = CPU_Intel_Core2; - if( ncpu == 4 ) - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Quad"); - else - Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Duo"); - break; - - case CPUFAMILY_INTEL_NEHALEM: - Platform::SystemInfo.processor.type = CPU_Intel_Core2; - Platform::SystemInfo.processor.name = StringTable->insert( "Intel 'Nehalem' Core Processor" ); - break; - - default: - // explain why we can't get the processor type. - Con::warnf( " Unknown Processor (family, type, subtype): 0x%x\t%d %d", cpufam, cputype, cpusub); - // for now, identify it as an x86 processor, because Apple is moving to Intel chips... - Platform::SystemInfo.processor.type = CPU_X86Compatible; - Platform::SystemInfo.processor.name = StringTable->insert("Unknown Processor, assuming x86 Compatible"); - break; - } - // Now we can directly query the system about a litany of "Optional" processor capabilities - // and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request - // a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists - // >>>> BUT <<<<< - // it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's - // actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined. - procflags = 0; - // Seriously this one should be an Assert() - err = _getSysCTLvalue("hw.optional.floatingpoint", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_FPU; - // List of chip-specific features - err = _getSysCTLvalue("hw.optional.mmx", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_MMX; - err = _getSysCTLvalue("hw.optional.sse", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE; - err = _getSysCTLvalue("hw.optional.sse2", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE2; - err = _getSysCTLvalue("hw.optional.sse3", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3; - err = _getSysCTLvalue("hw.optional.supplementalsse3", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3xt; - err = _getSysCTLvalue("hw.optional.sse4_1", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_1; - err = _getSysCTLvalue("hw.optional.sse4_2", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_2; - - // Finally some architecture-wide settings - err = _getSysCTLvalue("hw.ncpu", &lraw); - if ((err==0)&&(lraw>1)) procflags |= CPU_PROP_MP; - err = _getSysCTLvalue("hw.cpu64bit_capable", &lraw); - if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_64bit; - err = _getSysCTLvalue("hw.byteorder", &lraw); - if ((err==0)&&(lraw==1234)) procflags |= CPU_PROP_LE; - - Platform::SystemInfo.processor.properties = procflags; - - Con::printf( "%s, %2.2f GHz", Platform::SystemInfo.processor.name, F32( Platform::SystemInfo.processor.mhz ) / 1000.0 ); + procflags = CPU_PROP_FPU; + detectCpuFeatures(procflags); + + Platform::SystemInfo.processor.properties = procflags; + SetProcessoInfo(Platform::SystemInfo.processor, vendor, brandString); + + + Con::printf("System & Processor Information:"); + Con::printf(" MacOS Version: %s", [osVersionStr UTF8String]); + Con::printf(" Physical memory installed: %d MB", ramMB); + Con::printf(" Processor: %s", Platform::SystemInfo.processor.name); if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) - Con::printf( " MMX detected"); + Con::printf(" MMX detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE) - Con::printf( " SSE detected"); + Con::printf(" SSE detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2) - Con::printf( " SSE2 detected"); + Con::printf(" SSE2 detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3) - Con::printf( " SSE3 detected"); + Con::printf(" SSE3 detected"); + if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex) + Con::printf(" SSE3ex detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1) - Con::printf( " SSE4.1 detected"); + Con::printf(" SSE4.1 detected"); if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2) - Con::printf( " SSE4.2 detected"); + Con::printf(" SSE4.2 detected"); + if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX) + Con::printf(" AVX detected"); Con::printf( "" ); @@ -277,16 +204,38 @@ void Processor::init() Platform::SystemInfoReady.trigger(); } + namespace CPUInfo { - EConfig CPUCount(U32 &logical, U32 &numCores, U32 &numPhysical) { - // todo properly implement this - logical = [[NSProcessInfo processInfo] activeProcessorCount]; - numCores = [[NSProcessInfo processInfo] activeProcessorCount]; - numPhysical = [[NSProcessInfo processInfo] processorCount]; + EConfig CPUCount(U32 &logical, U32 &physical) { + U32 lraw; + int err; - // todo check for hyperthreading - if (numCores > 1) - return CONFIG_MultiCoreAndHTNotCapable; - return CONFIG_SingleCoreAndHTNotCapable; + err = _getSysCTLvalue("hw.physicalcpu", &lraw); + if (err == 0) + physical = lraw; + else + physical = 1; + + err = _getSysCTLvalue("hw.logicalcpu", &lraw); + if (err == 0) + { + logical = lraw; + } + else + { + // fallback to querying the number of cpus. If that fails, then assume same as number of cores + err = _getSysCTLvalue("hw.ncpu", &lraw); + if (err == 0) + logical = lraw; + else + logical = physical; + } + + const bool smtEnabled = logical > physical; + + if (physical == 1) + return smtEnabled ? CONFIG_SingleCoreHTEnabled : CONFIG_SingleCoreAndHTNotCapable; + + return smtEnabled ? CONFIG_MultiCoreAndHTEnabled : CONFIG_MultiCoreAndHTNotCapable; } }