macos platform fixes for intel macs

This commit is contained in:
Jeff Hutchinson 2021-09-25 22:39:40 -04:00
parent 433d32f237
commit 7cb306b65a
6 changed files with 132 additions and 885 deletions

View file

@ -73,9 +73,10 @@ enum ProcessorProperties
CPU_PROP_RDTSC = (1<<5), ///< Supports Read Time Stamp Counter op.
CPU_PROP_SSE2 = (1<<6), ///< Supports SSE2 instruction set extension.
CPU_PROP_SSE3 = (1<<7), ///< Supports SSE3 instruction set extension.
CPU_PROP_SSE3xt = (1<<8), ///< Supports extended SSE3 instruction set
CPU_PROP_SSE3ex = (1<<8), ///< Supports extended SSE3 instruction set
CPU_PROP_SSE4_1 = (1<<9), ///< Supports SSE4_1 instruction set extension.
CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension.
CPU_PROP_SSE4_2 = (1<<10), ///< Supports SSE4_2 instruction set extension.
CPU_PROP_AVX = (1<<11), ///< Supports AVX256 instruction set extension.
CPU_PROP_MP = (1<<11), ///< This is a multi-processor system.
CPU_PROP_LE = (1<<12), ///< This processor is LITTLE ENDIAN.
CPU_PROP_64bit = (1<<13), ///< This processor is 64-bit capable
@ -297,7 +298,6 @@ namespace Platform
bool isHyperThreaded;
U32 numLogicalProcessors;
U32 numPhysicalProcessors;
U32 numAvailableCores;
U32 properties; // CPU type specific enum
} processor;
};

View file

@ -28,48 +28,8 @@
Signal<void(void)> Platform::SystemInfoReady;
enum CPUFlags
void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand)
{
// EDX Register flags
BIT_RDTSC = BIT(4),
BIT_MMX = BIT(23),
BIT_SSE = BIT(25),
BIT_SSE2 = BIT(26),
BIT_3DNOW = BIT(31), // only available for amd cpus in x86
// These use a different value for comparison than the above flags (ECX Register)
BIT_SSE3 = BIT(0),
BIT_SSE3xt = BIT(9),
BIT_SSE4_1 = BIT(19),
BIT_SSE4_2 = BIT(20),
};
// fill the specified structure with information obtained from asm code
void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
char* vendor, char* brand, U32 processor, U32 properties, U32 properties2)
{
// always assume FPU is available in 2021...
pInfo.properties |= CPU_PROP_FPU;
#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
pInfo.properties |= CPU_PROP_LE;
#endif
#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
pInfo.properties |= CPU_PROP_64bit;
#endif
#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
pInfo.properties |= (properties & BIT_RDTSC) ? CPU_PROP_RDTSC : 0;
pInfo.properties |= (properties & BIT_MMX) ? CPU_PROP_MMX : 0;
pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0;
pInfo.properties |= (properties & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
pInfo.properties |= (properties2 & BIT_SSE3) ? CPU_PROP_SSE3 : 0;
pInfo.properties |= (properties2 & BIT_SSE3xt) ? CPU_PROP_SSE3xt : 0;
pInfo.properties |= (properties2 & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
pInfo.properties |= (properties2 & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
#endif
if (dStricmp(vendor, "GenuineIntel") == 0)
{
pInfo.type = CPU_Intel;
@ -80,9 +40,6 @@ void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
{
pInfo.name = StringTable->insert(brand ? brand : "AMD (unknown)");
pInfo.type = CPU_AMD;
// 3dnow! is only available in AMD cpus on x86. Otherwise its not reliably set.
pInfo.properties |= (properties & BIT_3DNOW) ? CPU_PROP_3DNOW : 0;
}
else if (dStricmp(vendor, "Apple") == 0)
{
@ -92,18 +49,22 @@ void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
else
{
#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
pInfo.name = StringTable->insert(brand ? brand : "x86 Compatible (unknown)");
pInfo.type = CPU_X86Compatible;
#elif defined(TORQUE_CPU_ARM64)
pInfo.name = StringTable->insert(brand ? brand : "Arm Compatible (unknown)");
pInfo.type = CPU_ArmCompatible;
#else
#error "Unknown CPU Architecture"
#endif
}
// Get multithreading caps.
CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numAvailableCores, pInfo.numPhysicalProcessors );
CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numPhysicalProcessors );
pInfo.isHyperThreaded = CPUInfo::isHyperThreaded( config );
pInfo.isMultiCore = CPUInfo::isMultiCore( config );

View file

@ -1,657 +0,0 @@
// Original code is:
// Copyright (c) 2005 Intel Corporation
// All Rights Reserved
//
// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
// The three forms of HW multithreading are: Multi-processor, Multi-core, and
// HyperThreading Technology.
// This application enumerates all the logical processors enabled by OS and BIOS,
// determine the HW topology of these enabled logical processors in the system
// using information provided by CPUID instruction.
// A multi-processing system can support any combination of the three forms of HW
// multi-threading support. The relevant topology can be identified using a
// three level decomposition of the "initial APIC ID" into
// Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of
// the topology of hardware resources and
// allow multi-threaded software to manage shared hardware resources in
// the platform to reduce resource contention
// Multicore detection algorithm for processor and cache topology requires
// all leaf functions of CPUID instructions be available. System administrator
// must ensure BIOS settings is not configured to restrict CPUID functionalities.
//-------------------------------------------------------------------------------------------------
#if defined(TORQUE_OS_LINUX) || defined(LINUX)
// TODO GCC code don't compile on Release with optimizations, mover code to platform layer
#else
#include "platform/platform.h"
#include "platform/platformCPUCount.h"
#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX)
#ifdef TORQUE_OS_LINUX
// The Linux source code listing can be compiled using Linux kernel verison 2.6
// or higher (e.g. RH 4AS-2.8 using GCC 3.4.4).
// Due to syntax variances of Linux affinity APIs with earlier kernel versions
// and dependence on glibc library versions, compilation on Linux environment
// with older kernels and compilers may require kernel patches or compiler upgrades.
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sched.h>
#define DWORD unsigned long
#elif defined( TORQUE_OS_WIN )
#include <windows.h>
#elif defined( TORQUE_OS_MAC )
# include <sys/types.h>
# include <sys/sysctl.h>
#else
#error Not implemented on platform.
#endif
#include <stdio.h>
#include <assert.h>
namespace CPUInfo {
#define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported
#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
// processors per physical processor when execute cpuid with
// eax set to 1
#define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
// per physical processor when execute cpuid with
// eax set to 4.
#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
// initial APIC ID for the processor this code is running on.
#ifndef TORQUE_OS_MAC
static U32 CpuIDSupported(void);
static U32 find_maskwidth(unsigned int);
static U32 HWD_MTSupported(void);
static U32 MaxLogicalProcPerPhysicalProc(void);
static U32 MaxCorePerPhysicalProc(void);
static U8 GetAPIC_ID(void);
static U8 GetNzbSubID(U8, U8, U8);
#endif
static char g_s3Levels[2048];
#ifndef TORQUE_OS_MAC
//
// CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return
// the maximum supported standard function.
//
static U32 CpuIDSupported(void)
{
U32 maxInputValue = 0;
// If CPUID instruction is supported
#ifdef TORQUE_COMPILER_GCC
try
{
// call cpuid with eax = 0
asm
(
"pushl %%ebx\n\t"
"xorl %%eax,%%eax\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a" (maxInputValue)
:
: "%ecx", "%edx"
);
}
catch (...)
{
return(0); // cpuid instruction is unavailable
}
#elif defined( TORQUE_COMPILER_VISUALC )
try
{
// call cpuid with eax = 0
__asm
{
xor eax, eax
cpuid
mov maxInputValue, eax
}
}
catch (...)
{
// cpuid instruction is unavailable
}
#else
# error Not implemented.
#endif
return maxInputValue;
}
//
// Function returns the maximum cores per physical package. Note that the number of
// AVAILABLE cores per physical to be used by an application might be less than this
// maximum value.
//
static U32 MaxCorePerPhysicalProc(void)
{
U32 Regeax = 0;
if (!HWD_MTSupported()) return (U32) 1; // Single core
#ifdef TORQUE_COMPILER_GCC
{
asm
(
"pushl %ebx\n\t"
"xorl %eax, %eax\n\t"
"cpuid\n\t"
"cmpl $4, %eax\n\t" // check if cpuid supports leaf 4
"jl .single_core\n\t" // Single core
"movl $4, %eax\n\t"
"movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports
"popl %ebx\n\t"
); // at least one valid cache level
asm
(
"cpuid"
: "=a" (Regeax)
:
: "%ecx", "%edx"
);
asm
(
"jmp .multi_core\n"
".single_core:\n\t"
"xor %eax, %eax\n"
".multi_core:"
);
}
#elif defined( TORQUE_COMPILER_VISUALC )
__asm
{
xor eax, eax
cpuid
cmp eax, 4 // check if cpuid supports leaf 4
jl single_core // Single core
mov eax, 4
mov ecx, 0 // start with index = 0; Leaf 4 reports
cpuid // at least one valid cache level
mov Regeax, eax
jmp multi_core
single_core:
xor eax, eax
multi_core:
}
#else
# error Not implemented.
#endif
return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1;
}
//
// The function returns 0 when the hardware multi-threaded bit is not set.
//
static U32 HWD_MTSupported(void)
{
U32 Regedx = 0;
if ((CpuIDSupported() >= 1))
{
#ifdef TORQUE_COMPILER_GCC
asm
(
"pushl %%ebx\n\t"
"movl $1,%%eax\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=d" (Regedx)
:
: "%eax","%ecx"
);
#elif defined( TORQUE_COMPILER_VISUALC )
__asm
{
mov eax, 1
cpuid
mov Regedx, edx
}
#else
# error Not implemented.
#endif
}
return (Regedx & HWD_MT_BIT);
}
//
// Function returns the maximum logical processors per physical package. Note that the number of
// AVAILABLE logical processors per physical to be used by an application might be less than this
// maximum value.
//
static U32 MaxLogicalProcPerPhysicalProc(void)
{
U32 Regebx = 0;
if (!HWD_MTSupported()) return (U32) 1;
#ifdef TORQUE_COMPILER_GCC
asm
(
"movl $1,%%eax\n\t"
"cpuid"
: "=b" (Regebx)
:
: "%eax","%ecx","%edx"
);
#elif defined( TORQUE_COMPILER_VISUALC )
__asm
{
mov eax, 1
cpuid
mov Regebx, ebx
}
#else
# error Not implemented.
#endif
return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
}
static U8 GetAPIC_ID(void)
{
U32 Regebx = 0;
#ifdef TORQUE_COMPILER_GCC
asm
(
"movl $1, %%eax\n\t"
"cpuid"
: "=b" (Regebx)
:
: "%eax","%ecx","%edx"
);
#elif defined( TORQUE_COMPILER_VISUALC )
__asm
{
mov eax, 1
cpuid
mov Regebx, ebx
}
#else
# error Not implemented.
#endif
return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
}
//
// Determine the width of the bit field that can represent the value count_item.
//
U32 find_maskwidth(U32 CountItem)
{
U32 MaskWidth,
count = CountItem;
#ifdef TORQUE_COMPILER_GCC
asm
(
#ifdef __x86_64__ // define constant to compile
"push %%rcx\n\t" // under 64-bit Linux
"push %%rax\n\t"
#else
"pushl %%ecx\n\t"
"pushl %%eax\n\t"
#endif
// "movl $count, %%eax\n\t" //done by Assembler below
"xorl %%ecx, %%ecx"
// "movl %%ecx, MaskWidth\n\t" //done by Assembler below
: "=c" (MaskWidth)
: "a" (count)
// : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
//to put them back when we are done
);
asm
(
"decl %%eax\n\t"
"bsrw %%ax,%%cx\n\t"
"jz next\n\t"
"incw %%cx\n\t"
// "movl %%ecx, MaskWidth\n" //done by Assembler below
: "=c" (MaskWidth)
:
);
asm
(
"next:\n\t"
#ifdef __x86_64__
"pop %rax\n\t"
"pop %rcx"
#else
"popl %eax\n\t"
"popl %ecx"
#endif
);
#elif defined( TORQUE_COMPILER_VISUALC )
__asm
{
mov eax, count
mov ecx, 0
mov MaskWidth, ecx
dec eax
bsr cx, ax
jz next
inc cx
mov MaskWidth, ecx
next:
}
#else
# error Not implemented.
#endif
return MaskWidth;
}
//
// Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value
//
static U8 GetNzbSubID(U8 FullID,
U8 MaxSubIDValue,
U8 ShiftCount)
{
U32 MaskWidth;
U8 MaskBits;
MaskWidth = find_maskwidth((U32) MaxSubIDValue);
MaskBits = (0xff << ShiftCount) ^
((U8) (0xff << (ShiftCount + MaskWidth)));
return (FullID & MaskBits);
}
#endif
//
//
//
EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
{
EConfig StatusFlag = CONFIG_UserConfigIssue;
g_s3Levels[0] = 0;
TotAvailCore = 1;
PhysicalNum = 1;
U32 numLPEnabled = 0;
S32 MaxLPPerCore = 1;
#ifdef TORQUE_OS_MAC
//FIXME: This isn't a proper port but more or less just some sneaky cheating
// to get around having to mess with yet another crap UNIX-style API. Seems
// like there isn't a way to do this that's working across all OSX incarnations
// and machine configurations anyway.
S32 numCPUs;
S32 numPackages;
// Get the number of CPUs.
size_t len = sizeof( numCPUs );
if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
return CONFIG_UserConfigIssue;
// Get the number of packages.
len = sizeof( numPackages );
if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
return CONFIG_UserConfigIssue;
TotAvailCore = numCPUs;
TotAvailLogical = numCPUs;
PhysicalNum = numPackages;
#else
U32 dwAffinityMask;
S32 j = 0;
U8 apicID, PackageIDMask;
U8 tblPkgID[256], tblCoreID[256], tblSMTID[256];
char tmp[256];
#ifdef TORQUE_OS_LINUX
//we need to make sure that this process is allowed to run on
//all of the logical processors that the OS itself can run on.
//A process could acquire/inherit affinity settings that restricts the
// current process to run on a subset of all logical processor visible to OS.
// Linux doesn't easily allow us to look at the Affinity Bitmask directly,
// but it does provide an API to test affinity maskbits of the current process
// against each logical processor visible under OS.
S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many
//CPUs are currently enabled.
//this will tell us which processors this process can run on.
cpu_set_t allowedCPUs;
sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
for (S32 i = 0; i < sysNumProcs; i++ )
{
if ( CPU_ISSET(i, &allowedCPUs) == 0 )
return CONFIG_UserConfigIssue;
}
#elif defined( TORQUE_OS_WIN )
DWORD dwProcessAffinity, dwSystemAffinity;
GetProcessAffinityMask(GetCurrentProcess(),
&dwProcessAffinity,
&dwSystemAffinity);
if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled
return CONFIG_UserConfigIssue;
#else
# error Not implemented.
#endif
// Assume that cores within a package have the SAME number of
// logical processors. Also, values returned by
// MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
// to be power of 2.
MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
dwAffinityMask = 1;
#ifdef TORQUE_OS_LINUX
cpu_set_t currentCPU;
while ( j < sysNumProcs )
{
CPU_ZERO(&currentCPU);
CPU_SET(j, &currentCPU);
if ( sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == 0 )
{
sleep(0); // Ensure system to switch to the right CPU
#elif defined( TORQUE_OS_WIN )
while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
{
if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
{
Sleep(0); // Ensure system to switch to the right CPU
#else
# error Not implemented.
#endif
apicID = GetAPIC_ID();
// Store SMT ID and core ID of each logical processor
// Shift vlaue for SMT ID is 0
// Shift value for core ID is the mask width for maximum logical
// processors per core
tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0);
U8 maxCorePPP = MaxCorePerPhysicalProc();
U8 maskWidth = find_maskwidth(MaxLPPerCore);
tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
// Extract package ID, assume single cluster.
// Shift value is the mask width for max Logical per package
PackageIDMask = (unsigned char) (0xff <<
find_maskwidth(MaxLogicalProcPerPhysicalProc()));
tblPkgID[j] = apicID & PackageIDMask;
sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n",
dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
dStrcat(g_s3Levels, tmp, 2048);
numLPEnabled ++; // Number of available logical processors in the system.
} // if
j++;
dwAffinityMask = 1 << j;
} // while
// restore the affinity setting to its original state
#ifdef TORQUE_OS_LINUX
sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
sleep(0);
#elif defined( TORQUE_OS_WIN )
SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
Sleep(0);
#else
# error Not implemented.
#endif
TotAvailLogical = numLPEnabled;
//
// Count available cores (TotAvailCore) in the system
//
U8 CoreIDBucket[256];
DWORD ProcessorMask, pCoreMask[256];
U32 i, ProcessorNum;
CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
ProcessorMask = 1;
pCoreMask[0] = ProcessorMask;
for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
{
ProcessorMask <<= 1;
for (i = 0; i < TotAvailCore; i++)
{
// Comparing bit-fields of logical processors residing in different packages
// Assuming the bit-masks are the same on all processors in the system.
if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
{
pCoreMask[i] |= ProcessorMask;
break;
}
} // for i
if (i == TotAvailCore) // did not match any bucket. Start a new one.
{
CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
pCoreMask[i] = ProcessorMask;
TotAvailCore++; // Number of available cores in the system
}
} // for ProcessorNum
//
// Count physical processor (PhysicalNum) in the system
//
U8 PackageIDBucket[256];
DWORD pPackageMask[256];
PackageIDBucket[0] = tblPkgID[0];
ProcessorMask = 1;
pPackageMask[0] = ProcessorMask;
for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
{
ProcessorMask <<= 1;
for (i = 0; i < PhysicalNum; i++)
{
// Comparing bit-fields of logical processors residing in different packages
// Assuming the bit-masks are the same on all processors in the system.
if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
{
pPackageMask[i] |= ProcessorMask;
break;
}
} // for i
if (i == PhysicalNum) // did not match any bucket. Start a new one.
{
PackageIDBucket[i] = tblPkgID[ProcessorNum];
pPackageMask[i] = ProcessorMask;
PhysicalNum++; // Total number of physical processors in the system
}
} // for ProcessorNum
#endif
//
// Check to see if the system is multi-core
// Check if the system is hyper-threading
//
if (TotAvailCore > PhysicalNum)
{
// Multi-core
if (MaxLPPerCore == 1)
StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
else if (numLPEnabled > TotAvailCore)
StatusFlag = CONFIG_MultiCoreAndHTEnabled;
else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
}
else
{
// Single-core
if (MaxLPPerCore == 1)
StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
else if (numLPEnabled > TotAvailCore)
StatusFlag = CONFIG_SingleCoreHTEnabled;
else StatusFlag = CONFIG_SingleCoreHTDisabled;
}
return StatusFlag;
}
} // namespace CPUInfo
#endif
#endif

View file

@ -29,13 +29,10 @@ namespace CPUInfo
{
enum EConfig
{
CONFIG_UserConfigIssue,
CONFIG_SingleCoreHTEnabled,
CONFIG_SingleCoreHTDisabled,
CONFIG_SingleCoreAndHTNotCapable,
CONFIG_MultiCoreAndHTNotCapable,
CONFIG_MultiCoreAndHTEnabled,
CONFIG_MultiCoreAndHTDisabled,
};
inline bool isMultiCore( EConfig config )
@ -44,7 +41,6 @@ namespace CPUInfo
{
case CONFIG_MultiCoreAndHTNotCapable:
case CONFIG_MultiCoreAndHTEnabled:
case CONFIG_MultiCoreAndHTDisabled:
return true;
default:
@ -65,11 +61,10 @@ namespace CPUInfo
}
}
EConfig CPUCount( U32& totalAvailableLogical,
U32& totalAvailableCores,
U32& numPhysical );
EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores );
} // namespace CPUInfo
void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand);
#endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_

View file

@ -322,10 +322,9 @@ ThreadPool::ThreadPool( const char* name, U32 numThreads )
// Platform::SystemInfo will not yet have been initialized.
U32 numLogical = 0;
U32 numPhysical = 0;
U32 numCores = 0;
CPUInfo::CPUCount( numLogical, numCores, numPhysical );
CPUInfo::CPUCount( numLogical, numCores );
const U32 baseCount = getMax( numLogical, numCores );
mNumThreads = (baseCount > 0) ? baseCount : 2;

View file

@ -35,15 +35,6 @@
// we now have to use NSProcessInfo
#import <Foundation/Foundation.h>
//recently removed in Xcode 8 - most likely don't need these anymore
#ifndef CPUFAMILY_INTEL_YONAH
#define CPUFAMILY_INTEL_YONAH 0x73d67300
#endif
#ifndef CPUFAMILY_INTEL_MEROM
#define CPUFAMILY_INTEL_MEROM 0x426f69ef
#endif
// Original code by Sean O'Brien (http://www.garagegames.com/community/forums/viewthread/81815).
@ -89,8 +80,58 @@ int _getSysCTLvalue(const char key[], T * dest) {
Platform::SystemInfo_struct Platform::SystemInfo;
#define BASE_MHZ_SPEED 0
//TODO update cpu list
#define BASE_MHZ_SPEED 1000
static void detectCpuFeatures(U32 &procflags)
{
// Now we can directly query the system about a litany of "Optional" processor capabilities
// and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
// a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists
// >>>> BUT <<<<<
// it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's
// actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
int err;
U32 lraw;
// List of chip-specific features
err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_MMX;
err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_SSE;
err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_SSE2;
err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_SSE3;
err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_SSE3ex;
err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_SSE4_1;
err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_SSE4_2;
err = _getSysCTLvalue<U32>("hw.optional.avx1_0", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_AVX;
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
if ((err==0)&&(lraw>1))
procflags |= CPU_PROP_MP;
err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
if ((err==0)&&(lraw==1))
procflags |= CPU_PROP_64bit;
err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
if ((err==0)&&(lraw==1234))
procflags |= CPU_PROP_LE;
}
void Processor::init()
{
U32 procflags;
@ -98,178 +139,64 @@ void Processor::init()
char buf[255];
U32 lraw;
U64 llraw;
Con::printf( "System & Processor Information:" );
// Gestalt has been deprecated since Mac OSX Mountain Lion and has stopped working on
// Mac OSX Yosemite. we have to use NSProcessInfo now.
// Availability: Mac OS 10.2 or greater.
NSString *osVersionStr = [[NSProcessInfo processInfo] operatingSystemVersionString];
Con::printf( " OSX Version: %s", [osVersionStr UTF8String]);
err = _getSysCTLstring("kern.ostype", buf, sizeof(buf));
if (err)
Con::printf( " Unable to determine OS type\n" );
else
Con::printf( " Mac OS Kernel name: %s", buf);
err = _getSysCTLstring("kern.osrelease", buf, sizeof(buf));
if (err)
Con::printf( " Unable to determine OS release number\n" );
else
Con::printf( " Mac OS Kernel version: %s", buf );
S32 ramMB;
err = _getSysCTLvalue<U64>("hw.memsize", &llraw);
if (err)
Con::printf( " Unable to determine amount of physical RAM\n" );
ramMB = 512;
else
Con::printf( " Physical memory installed: %d MB", (llraw >> 20));
err = _getSysCTLvalue<U32>("hw.usermem", &lraw);
if (err)
Con::printf( " Unable to determine available user address space\n");
else
Con::printf( " Addressable user memory: %d MB", (lraw >> 20));
////////////////////////////////
// Values for the Family Type, CPU Type and CPU Subtype are defined in the
// SDK files for the Mach Kernel ==> mach/machine.h
////////////////////////////////
// CPU Family, Type, and Subtype
cpufam = 0;
cputype = 0;
cpusub = 0;
err = _getSysCTLvalue<U32>("hw.cpufamily", &lraw);
if (err)
Con::printf( " Unable to determine 'family' of CPU\n");
else {
cpufam = (int) lraw;
err = _getSysCTLvalue<U32>("hw.cputype", &lraw);
if (err)
Con::printf( " Unable to determine CPU type\n");
else {
cputype = (int) lraw;
err = _getSysCTLvalue<U32>("hw.cpusubtype", &lraw);
if (err)
Con::printf( " Unable to determine CPU subtype\n");
else
cpusub = (int) lraw;
// If we've made it this far,
Con::printf( " Installed processor ID: Family 0x%08x Type %d Subtype %d",cpufam, cputype,cpusub);
}
}
ramMB = llraw >> 20;
char brandString[256];
err = _getSysCTLstring("machdep.cpu.brand_string", brandString, sizeof(brandString));
if (err)
brandString[0] = '\0';
char vendor[256];
err = _getSysCTLstring("machdep.cpu.vendor", vendor, sizeof(vendor));
if (err)
vendor[0] = '\0';
// The Gestalt version was known to have issues with some Processor Upgrade cards
// but it is uncertain whether this version has similar issues.
err = _getSysCTLvalue<U64>("hw.cpufrequency", &llraw);
if (err) {
llraw = BASE_MHZ_SPEED;
Con::printf( " Unable to determine CPU Frequency. Defaulting to %d MHz\n", llraw);
} else {
llraw /= 1000000;
Con::printf( " Installed processor clock frequency: %d MHz", llraw);
}
Platform::SystemInfo.processor.mhz = (unsigned int)llraw;
// Here's one that the original version of this routine couldn't do -- number
// of processors (cores)
U32 ncpu = 1;
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
if (err)
Con::printf( " Unable to determine number of processor cores\n");
else
{
ncpu = lraw;
Con::printf( " Installed/available processor cores: %d", lraw);
}
// Now use CPUFAM to determine and then store the processor type
// and 'friendly name' in GG-accessible structure. Note that since
// we have access to the Family code, the Type and Subtypes are useless.
//
// NOTE: Even this level of detail is almost assuredly not needed anymore
// and the Optional Capability flags (further down) should be more than enough.
switch(cpufam)
{
case CPUFAMILY_INTEL_YONAH:
Platform::SystemInfo.processor.type = CPU_Intel_Core;
if( ncpu == 2 )
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core Duo");
else
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core");
break;
case CPUFAMILY_INTEL_PENRYN:
case CPUFAMILY_INTEL_MEROM:
Platform::SystemInfo.processor.type = CPU_Intel_Core2;
if( ncpu == 4 )
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Quad");
else
Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Duo");
break;
case CPUFAMILY_INTEL_NEHALEM:
Platform::SystemInfo.processor.type = CPU_Intel_Core2;
Platform::SystemInfo.processor.name = StringTable->insert( "Intel 'Nehalem' Core Processor" );
break;
default:
// explain why we can't get the processor type.
Con::warnf( " Unknown Processor (family, type, subtype): 0x%x\t%d %d", cpufam, cputype, cpusub);
// for now, identify it as an x86 processor, because Apple is moving to Intel chips...
Platform::SystemInfo.processor.type = CPU_X86Compatible;
Platform::SystemInfo.processor.name = StringTable->insert("Unknown Processor, assuming x86 Compatible");
break;
}
// Now we can directly query the system about a litany of "Optional" processor capabilities
// and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
// a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists
// >>>> BUT <<<<<
// it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's
// actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
procflags = 0;
// Seriously this one should be an Assert()
err = _getSysCTLvalue<U32>("hw.optional.floatingpoint", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_FPU;
// List of chip-specific features
err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_MMX;
err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE;
err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE2;
err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3;
err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3xt;
err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_1;
err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_2;
// Finally some architecture-wide settings
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
if ((err==0)&&(lraw>1)) procflags |= CPU_PROP_MP;
err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_64bit;
err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
if ((err==0)&&(lraw==1234)) procflags |= CPU_PROP_LE;
Platform::SystemInfo.processor.properties = procflags;
Con::printf( "%s, %2.2f GHz", Platform::SystemInfo.processor.name, F32( Platform::SystemInfo.processor.mhz ) / 1000.0 );
procflags = CPU_PROP_FPU;
detectCpuFeatures(procflags);
Platform::SystemInfo.processor.properties = procflags;
SetProcessoInfo(Platform::SystemInfo.processor, vendor, brandString);
Con::printf("System & Processor Information:");
Con::printf(" MacOS Version: %s", [osVersionStr UTF8String]);
Con::printf(" Physical memory installed: %d MB", ramMB);
Con::printf(" Processor: %s", Platform::SystemInfo.processor.name);
if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
Con::printf( " MMX detected");
Con::printf(" MMX detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
Con::printf( " SSE detected");
Con::printf(" SSE detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
Con::printf( " SSE2 detected");
Con::printf(" SSE2 detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
Con::printf( " SSE3 detected");
Con::printf(" SSE3 detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
Con::printf(" SSE3ex detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
Con::printf( " SSE4.1 detected");
Con::printf(" SSE4.1 detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
Con::printf( " SSE4.2 detected");
Con::printf(" SSE4.2 detected");
if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
Con::printf(" AVX detected");
Con::printf( "" );
@ -277,16 +204,38 @@ void Processor::init()
Platform::SystemInfoReady.trigger();
}
namespace CPUInfo {
EConfig CPUCount(U32 &logical, U32 &numCores, U32 &numPhysical) {
// todo properly implement this
logical = [[NSProcessInfo processInfo] activeProcessorCount];
numCores = [[NSProcessInfo processInfo] activeProcessorCount];
numPhysical = [[NSProcessInfo processInfo] processorCount];
EConfig CPUCount(U32 &logical, U32 &physical) {
U32 lraw;
int err;
// todo check for hyperthreading
if (numCores > 1)
return CONFIG_MultiCoreAndHTNotCapable;
return CONFIG_SingleCoreAndHTNotCapable;
err = _getSysCTLvalue<U32>("hw.physicalcpu", &lraw);
if (err == 0)
physical = lraw;
else
physical = 1;
err = _getSysCTLvalue<U32>("hw.logicalcpu", &lraw);
if (err == 0)
{
logical = lraw;
}
else
{
// fallback to querying the number of cpus. If that fails, then assume same as number of cores
err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
if (err == 0)
logical = lraw;
else
logical = physical;
}
const bool smtEnabled = logical > physical;
if (physical == 1)
return smtEnabled ? CONFIG_SingleCoreHTEnabled : CONFIG_SingleCoreAndHTNotCapable;
return smtEnabled ? CONFIG_MultiCoreAndHTEnabled : CONFIG_MultiCoreAndHTNotCapable;
}
}