diff --git a/Engine/source/platform/profiler.cpp b/Engine/source/platform/profiler.cpp index 21671218d..ead8380f0 100644 --- a/Engine/source/platform/profiler.cpp +++ b/Engine/source/platform/profiler.cpp @@ -23,11 +23,9 @@ #include "platform/platform.h" #if defined(TORQUE_OS_WIN) -#include // for SetThreadAffinityMask -#endif - -#if defined(TORQUE_OS_MAC) -#include +#include // for SetThreadAffinityMask, QueryPerformanceCounter, QueryPerformanceFrequency +#elif defined(TORQUE_OS_MAC) +#include // for mach_absolute_time, mach_timebase_info #endif #include "core/stream/fileStream.h" @@ -63,111 +61,61 @@ Vector gProfilerNodeStack; #define PROFILER_DEBUG_POP_NODE() ; #endif -#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM) -// platform specific get hires times... -void startHighResolutionTimer(U32 time[2]) -{ - //time[0] = Platform::getRealMilliseconds(); +#if defined(TORQUE_OS_WIN) - __asm - { - push eax - push edx - push ecx - rdtsc - mov ecx, time - mov DWORD PTR [ecx], eax - mov DWORD PTR [ecx + 4], edx - pop ecx - pop edx - pop eax - } -} - -U32 endHighResolutionTimer(U32 time[2]) -{ - U32 ticks; - //ticks = Platform::getRealMilliseconds() - time[0]; - //return ticks; - - __asm - { - push eax - push edx - push ecx - //db 0fh, 31h - rdtsc - mov ecx, time - sub edx, DWORD PTR [ecx+4] - sbb eax, DWORD PTR [ecx] - mov DWORD PTR ticks, eax - pop ecx - pop edx - pop eax - } - return ticks; -} - -#elif defined(TORQUE_SUPPORTS_GCC_INLINE_X86_ASM) +static bool sQueryPerformanceInit = false; +static U64 sQueryPerformanceFrequency = 0; // platform specific get hires times... -void startHighResolutionTimer(U32 time[2]) +void startHighResolutionTimer(U64 &time) { - __asm__ __volatile__( - "rdtsc\n" - : "=a" (time[0]), "=d" (time[1]) - ); + QueryPerformanceCounter((LARGE_INTEGER*)&time); } -U32 endHighResolutionTimer(U32 time[2]) +F64 endHighResolutionTimer(U64 time) { - U32 ticks; - __asm__ __volatile__( - "rdtsc\n" - "sub 0x4(%%ecx), %%edx\n" - "sbb (%%ecx), %%eax\n" - : "=a" (ticks) : "c" (time) - ); - return ticks; + if (!sQueryPerformanceInit) + { + sQueryPerformanceInit = true; + QueryPerformanceFrequency((LARGE_INTEGER*)&sQueryPerformanceFrequency); + } + + U64 current; + QueryPerformanceCounter((LARGE_INTEGER*)¤t); + + return ((1000.0 * static_cast(current-time)) / static_cast(sQueryPerformanceFrequency)); } #elif defined(TORQUE_OS_MAC) - -void startHighResolutionTimer(U32 time[2]) { - U64 now = mach_absolute_time(); - AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]"); - memcpy(time, &now, sizeof(U64)); +void startHighResolutionTimer(U64 &time) { + time = mach_absolute_time(); } -U32 endHighResolutionTimer(U32 time[2]) { +F64 endHighResolutionTimer(U64 time) { static mach_timebase_info_data_t sTimebaseInfo = {0, 0}; U64 now = mach_absolute_time(); - AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]"); - U64 then; - memcpy(&then, time, sizeof(U64)); if(sTimebaseInfo.denom == 0){ mach_timebase_info(&sTimebaseInfo); } // Handle the micros/nanos conversion first, because shedding a few bits is better than overflowing. - U64 elapsedMicros = ((now - then) / 1000) * sTimebaseInfo.numer / sTimebaseInfo.denom; + F64 elapsedMicros = (static_cast(now - time) / 1000.0) * static_cast(sTimebaseInfo.numer) / static_cast(sTimebaseInfo.denom); - return (U32)elapsedMicros; // Just truncate, and hope we didn't overflow + return elapsedMicros; // Just truncate, and hope we didn't overflow } #else -void startHighResolutionTimer(U32 time[2]) +void startHighResolutionTimer(U64 &time) { - time[0] = Platform::getRealMilliseconds(); + time = (U64)Platform::getRealMilliseconds(); } -U32 endHighResolutionTimer(U32 time[2]) +F64 endHighResolutionTimer(U64 time) { - U32 ticks = Platform::getRealMilliseconds() - time[0]; - return ticks; + return (F64)Platform::getRealMilliseconds() - time; } #endif diff --git a/Engine/source/platform/profiler.h b/Engine/source/platform/profiler.h index 3b11d93fd..b2cc70715 100644 --- a/Engine/source/platform/profiler.h +++ b/Engine/source/platform/profiler.h @@ -153,7 +153,7 @@ struct ProfilerData U32 mHash; U32 mSubDepth; U32 mInvokeCount; - U32 mStartTime[2]; + U64 mStartTime; F64 mTotalTime; F64 mSubTime; #ifdef TORQUE_ENABLE_PROFILE_PATH diff --git a/Engine/source/platformWin32/winTimer.cpp b/Engine/source/platformWin32/winTimer.cpp index 77e820b07..e5c8639a1 100644 --- a/Engine/source/platformWin32/winTimer.cpp +++ b/Engine/source/platformWin32/winTimer.cpp @@ -30,14 +30,11 @@ class Win32Timer : public PlatformTimer { private: - U32 mTickCountCurrent; - U32 mTickCountNext; S64 mPerfCountCurrent; S64 mPerfCountNext; S64 mFrequency; F64 mPerfCountRemainderCurrent; F64 mPerfCountRemainderNext; - bool mUsingPerfCounter; public: Win32Timer() @@ -45,43 +42,26 @@ public: mPerfCountRemainderCurrent = 0.0f; mPerfCountRemainderNext = 0.0f; - // Attempt to use QPC for high res timing, otherwise fallback to GTC. - mUsingPerfCounter = QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency); - if(mUsingPerfCounter) - mUsingPerfCounter = QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent); + QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency); + QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent); mPerfCountNext = 0.0; - if (!mUsingPerfCounter) - mTickCountCurrent = GetTickCount(); - else - mTickCountCurrent = 0; - mTickCountNext = 0; } const S32 getElapsedMs() { - if(mUsingPerfCounter) - { - // Use QPC, update remainders so we don't leak time, and return the elapsed time. - QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext); - F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency)); - elapsedF64 += mPerfCountRemainderCurrent; - U32 elapsed = (U32)mFloor(elapsedF64); - mPerfCountRemainderNext = elapsedF64 - F64(elapsed); + // Use QPC, update remainders so we don't leak time, and return the elapsed time. + QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext); + F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency)); + elapsedF64 += mPerfCountRemainderCurrent; + U32 elapsed = (U32)mFloor(elapsedF64); + mPerfCountRemainderNext = elapsedF64 - F64(elapsed); - return elapsed; - } - else - { - // Do something naive with GTC. - mTickCountNext = GetTickCount(); - return mTickCountNext - mTickCountCurrent; - } + return elapsed; } void reset() { // Do some simple copying to reset the timer to 0. - mTickCountCurrent = mTickCountNext; mPerfCountCurrent = mPerfCountNext; mPerfCountRemainderCurrent = mPerfCountRemainderNext; }