mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-03-31 18:15:24 +00:00
Merge pull request #429 from JeffProgrammer/high_resolution_timer
High resolution timer fixes
This commit is contained in:
commit
a0581dce2a
3 changed files with 38 additions and 110 deletions
|
|
@ -23,11 +23,9 @@
|
|||
#include "platform/platform.h"
|
||||
|
||||
#if defined(TORQUE_OS_WIN)
|
||||
#include<Windows.h> // for SetThreadAffinityMask
|
||||
#endif
|
||||
|
||||
#if defined(TORQUE_OS_MAC)
|
||||
#include <mach/mach_time.h>
|
||||
#include<Windows.h> // for SetThreadAffinityMask, QueryPerformanceCounter, QueryPerformanceFrequency
|
||||
#elif defined(TORQUE_OS_MAC)
|
||||
#include <mach/mach_time.h> // for mach_absolute_time, mach_timebase_info
|
||||
#endif
|
||||
|
||||
#include "core/stream/fileStream.h"
|
||||
|
|
@ -63,111 +61,61 @@ Vector<StringTableEntry> gProfilerNodeStack;
|
|||
#define PROFILER_DEBUG_POP_NODE() ;
|
||||
#endif
|
||||
|
||||
#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
|
||||
// platform specific get hires times...
|
||||
void startHighResolutionTimer(U32 time[2])
|
||||
{
|
||||
//time[0] = Platform::getRealMilliseconds();
|
||||
#if defined(TORQUE_OS_WIN)
|
||||
|
||||
__asm
|
||||
{
|
||||
push eax
|
||||
push edx
|
||||
push ecx
|
||||
rdtsc
|
||||
mov ecx, time
|
||||
mov DWORD PTR [ecx], eax
|
||||
mov DWORD PTR [ecx + 4], edx
|
||||
pop ecx
|
||||
pop edx
|
||||
pop eax
|
||||
}
|
||||
}
|
||||
|
||||
U32 endHighResolutionTimer(U32 time[2])
|
||||
{
|
||||
U32 ticks;
|
||||
//ticks = Platform::getRealMilliseconds() - time[0];
|
||||
//return ticks;
|
||||
|
||||
__asm
|
||||
{
|
||||
push eax
|
||||
push edx
|
||||
push ecx
|
||||
//db 0fh, 31h
|
||||
rdtsc
|
||||
mov ecx, time
|
||||
sub edx, DWORD PTR [ecx+4]
|
||||
sbb eax, DWORD PTR [ecx]
|
||||
mov DWORD PTR ticks, eax
|
||||
pop ecx
|
||||
pop edx
|
||||
pop eax
|
||||
}
|
||||
return ticks;
|
||||
}
|
||||
|
||||
#elif defined(TORQUE_SUPPORTS_GCC_INLINE_X86_ASM)
|
||||
static bool sQueryPerformanceInit = false;
|
||||
static U64 sQueryPerformanceFrequency = 0;
|
||||
|
||||
// platform specific get hires times...
|
||||
void startHighResolutionTimer(U32 time[2])
|
||||
void startHighResolutionTimer(U64 &time)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"rdtsc\n"
|
||||
: "=a" (time[0]), "=d" (time[1])
|
||||
);
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&time);
|
||||
}
|
||||
|
||||
U32 endHighResolutionTimer(U32 time[2])
|
||||
F64 endHighResolutionTimer(U64 time)
|
||||
{
|
||||
U32 ticks;
|
||||
__asm__ __volatile__(
|
||||
"rdtsc\n"
|
||||
"sub 0x4(%%ecx), %%edx\n"
|
||||
"sbb (%%ecx), %%eax\n"
|
||||
: "=a" (ticks) : "c" (time)
|
||||
);
|
||||
return ticks;
|
||||
if (!sQueryPerformanceInit)
|
||||
{
|
||||
sQueryPerformanceInit = true;
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&sQueryPerformanceFrequency);
|
||||
}
|
||||
|
||||
U64 current;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)¤t);
|
||||
|
||||
return ((1000.0 * static_cast<F64>(current-time)) / static_cast<F64>(sQueryPerformanceFrequency));
|
||||
}
|
||||
|
||||
#elif defined(TORQUE_OS_MAC)
|
||||
|
||||
|
||||
void startHighResolutionTimer(U32 time[2]) {
|
||||
U64 now = mach_absolute_time();
|
||||
AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]");
|
||||
memcpy(time, &now, sizeof(U64));
|
||||
void startHighResolutionTimer(U64 &time) {
|
||||
time = mach_absolute_time();
|
||||
}
|
||||
|
||||
U32 endHighResolutionTimer(U32 time[2]) {
|
||||
F64 endHighResolutionTimer(U64 time) {
|
||||
static mach_timebase_info_data_t sTimebaseInfo = {0, 0};
|
||||
|
||||
U64 now = mach_absolute_time();
|
||||
AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]");
|
||||
U64 then;
|
||||
memcpy(&then, time, sizeof(U64));
|
||||
|
||||
if(sTimebaseInfo.denom == 0){
|
||||
mach_timebase_info(&sTimebaseInfo);
|
||||
}
|
||||
// Handle the micros/nanos conversion first, because shedding a few bits is better than overflowing.
|
||||
U64 elapsedMicros = ((now - then) / 1000) * sTimebaseInfo.numer / sTimebaseInfo.denom;
|
||||
F64 elapsedMicros = (static_cast<F64>(now - time) / 1000.0) * static_cast<F64>(sTimebaseInfo.numer) / static_cast<F64>(sTimebaseInfo.denom);
|
||||
|
||||
return (U32)elapsedMicros; // Just truncate, and hope we didn't overflow
|
||||
return elapsedMicros; // Just truncate, and hope we didn't overflow
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void startHighResolutionTimer(U32 time[2])
|
||||
void startHighResolutionTimer(U64 &time)
|
||||
{
|
||||
time[0] = Platform::getRealMilliseconds();
|
||||
time = (U64)Platform::getRealMilliseconds();
|
||||
}
|
||||
|
||||
U32 endHighResolutionTimer(U32 time[2])
|
||||
F64 endHighResolutionTimer(U64 time)
|
||||
{
|
||||
U32 ticks = Platform::getRealMilliseconds() - time[0];
|
||||
return ticks;
|
||||
return (F64)Platform::getRealMilliseconds() - time;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ struct ProfilerData
|
|||
U32 mHash;
|
||||
U32 mSubDepth;
|
||||
U32 mInvokeCount;
|
||||
U32 mStartTime[2];
|
||||
U64 mStartTime;
|
||||
F64 mTotalTime;
|
||||
F64 mSubTime;
|
||||
#ifdef TORQUE_ENABLE_PROFILE_PATH
|
||||
|
|
|
|||
|
|
@ -30,14 +30,11 @@
|
|||
class Win32Timer : public PlatformTimer
|
||||
{
|
||||
private:
|
||||
U32 mTickCountCurrent;
|
||||
U32 mTickCountNext;
|
||||
S64 mPerfCountCurrent;
|
||||
S64 mPerfCountNext;
|
||||
S64 mFrequency;
|
||||
F64 mPerfCountRemainderCurrent;
|
||||
F64 mPerfCountRemainderNext;
|
||||
bool mUsingPerfCounter;
|
||||
public:
|
||||
|
||||
Win32Timer()
|
||||
|
|
@ -45,43 +42,26 @@ public:
|
|||
mPerfCountRemainderCurrent = 0.0f;
|
||||
mPerfCountRemainderNext = 0.0f;
|
||||
|
||||
// Attempt to use QPC for high res timing, otherwise fallback to GTC.
|
||||
mUsingPerfCounter = QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency);
|
||||
if(mUsingPerfCounter)
|
||||
mUsingPerfCounter = QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent);
|
||||
QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency);
|
||||
QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent);
|
||||
mPerfCountNext = 0.0;
|
||||
if (!mUsingPerfCounter)
|
||||
mTickCountCurrent = GetTickCount();
|
||||
else
|
||||
mTickCountCurrent = 0;
|
||||
mTickCountNext = 0;
|
||||
}
|
||||
|
||||
const S32 getElapsedMs()
|
||||
{
|
||||
if(mUsingPerfCounter)
|
||||
{
|
||||
// Use QPC, update remainders so we don't leak time, and return the elapsed time.
|
||||
QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext);
|
||||
F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency));
|
||||
elapsedF64 += mPerfCountRemainderCurrent;
|
||||
U32 elapsed = (U32)mFloor(elapsedF64);
|
||||
mPerfCountRemainderNext = elapsedF64 - F64(elapsed);
|
||||
// Use QPC, update remainders so we don't leak time, and return the elapsed time.
|
||||
QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext);
|
||||
F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency));
|
||||
elapsedF64 += mPerfCountRemainderCurrent;
|
||||
U32 elapsed = (U32)mFloor(elapsedF64);
|
||||
mPerfCountRemainderNext = elapsedF64 - F64(elapsed);
|
||||
|
||||
return elapsed;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Do something naive with GTC.
|
||||
mTickCountNext = GetTickCount();
|
||||
return mTickCountNext - mTickCountCurrent;
|
||||
}
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
// Do some simple copying to reset the timer to 0.
|
||||
mTickCountCurrent = mTickCountNext;
|
||||
mPerfCountCurrent = mPerfCountNext;
|
||||
mPerfCountRemainderCurrent = mPerfCountRemainderNext;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue