From db6d91925d74427be4fae97dd5f80845898c58ee Mon Sep 17 00:00:00 2001 From: Jeff Hutchinson Date: Fri, 6 May 2016 23:44:41 -0400 Subject: [PATCH 1/2] Added profile blocks for GL. --- Engine/source/gfx/gl/gfxGLCircularVolatileBuffer.h | 6 ++++-- Engine/source/gfx/gl/gfxGLDevice.cpp | 5 +++++ Engine/source/gfx/gl/gfxGLShader.cpp | 4 ++++ Engine/source/gfx/gl/gfxGLStateBlock.cpp | 1 + Engine/source/gfx/gl/gfxGLTextureManager.cpp | 8 ++++++++ Engine/source/gfx/gl/gfxGLTextureObject.cpp | 7 +++++++ 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Engine/source/gfx/gl/gfxGLCircularVolatileBuffer.h b/Engine/source/gfx/gl/gfxGLCircularVolatileBuffer.h index 6d7d0e4b1..dd25255c2 100644 --- a/Engine/source/gfx/gl/gfxGLCircularVolatileBuffer.h +++ b/Engine/source/gfx/gl/gfxGLCircularVolatileBuffer.h @@ -20,7 +20,8 @@ public: } void init(U32 start, U32 end) - { + { + PROFILE_SCOPE(GFXGLQueryFence_issue); mStart = start; mEnd = end; mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); @@ -35,7 +36,8 @@ public: } void wait() - { + { + PROFILE_SCOPE(GFXGLQueryFence_block); GLbitfield waitFlags = 0; GLuint64 waitDuration = 0; while( 1 ) diff --git a/Engine/source/gfx/gl/gfxGLDevice.cpp b/Engine/source/gfx/gl/gfxGLDevice.cpp index f59e8ac92..4974a04e0 100644 --- a/Engine/source/gfx/gl/gfxGLDevice.cpp +++ b/Engine/source/gfx/gl/gfxGLDevice.cpp @@ -325,6 +325,7 @@ void GFXGLDevice::resurrect() GFXVertexBuffer* GFXGLDevice::findVolatileVBO(U32 numVerts, const GFXVertexFormat *vertexFormat, U32 vertSize) { + PROFILE_SCOPE(GFXGLDevice_findVBPool); for(U32 i = 0; i < mVolatileVBs.size(); i++) if ( mVolatileVBs[i]->mNumVerts >= numVerts && mVolatileVBs[i]->mVertexFormat.isEqual( *vertexFormat ) && @@ -333,6 +334,7 @@ GFXVertexBuffer* GFXGLDevice::findVolatileVBO(U32 numVerts, const GFXVertexForma return mVolatileVBs[i]; // No existing VB, so create one + PROFILE_SCOPE(GFXGLDevice_createVBPool); StrongRefPtr buf(new GFXGLVertexBuffer(GFX, numVerts, vertexFormat, vertSize, GFXBufferTypeVolatile)); buf->registerResourceWithDevice(this); mVolatileVBs.push_back(buf); @@ -358,6 +360,7 @@ GFXVertexBuffer *GFXGLDevice::allocVertexBuffer( U32 numVerts, GFXBufferType bufferType, void* data ) { + PROFILE_SCOPE(GFXGLDevice_allocVertexBuffer); if(bufferType == GFXBufferTypeVolatile) return findVolatileVBO(numVerts, vertexFormat, vertSize); @@ -523,6 +526,7 @@ inline GLsizei GFXGLDevice::primCountToIndexCount(GFXPrimitiveType primType, U32 GFXVertexDecl* GFXGLDevice::allocVertexDecl( const GFXVertexFormat *vertexFormat ) { + PROFILE_SCOPE(GFXGLDevice_allocVertexDecl); typedef Map GFXGLVertexDeclMap; static GFXGLVertexDeclMap declMap; GFXGLVertexDeclMap::Iterator itr = declMap.find( (void*)vertexFormat->getDescription().c_str() ); // description string are interned, safe to use c_str() @@ -855,6 +859,7 @@ void GFXGLDevice::setShader(GFXShader *shader, bool force) void GFXGLDevice::setShaderConstBufferInternal(GFXShaderConstBuffer* buffer) { + PROFILE_SCOPE(GFXGLDevice_setShaderConstBufferInternal); static_cast(buffer)->activate(); } diff --git a/Engine/source/gfx/gl/gfxGLShader.cpp b/Engine/source/gfx/gl/gfxGLShader.cpp index 2e63c61fe..960af7bd1 100644 --- a/Engine/source/gfx/gl/gfxGLShader.cpp +++ b/Engine/source/gfx/gl/gfxGLShader.cpp @@ -344,6 +344,7 @@ void GFXGLShaderConstBuffer::set(GFXShaderConstHandle* handle, const MatrixF* ma void GFXGLShaderConstBuffer::activate() { + PROFILE_SCOPE(GFXGLShaderConstBuffer_activate); mShader->setConstantsFromBuffer(this); mWasLost = false; } @@ -394,6 +395,7 @@ void GFXGLShader::clearShaders() bool GFXGLShader::_init() { + PROFILE_SCOPE(GFXGLShader_Init); // Don't initialize empty shaders. if ( mVertexFile.isEmpty() && mPixelFile.isEmpty() ) return false; @@ -1013,6 +1015,7 @@ bool GFXGLShader::initShader( const Torque::Path &file, bool isVertex, const Vector ¯os ) { + PROFILE_SCOPE(GFXGLShader_CompileShader); GLuint activeShader = glCreateShader(isVertex ? GL_VERTEX_SHADER : GL_FRAGMENT_SHADER); if(isVertex) mVertexShader = activeShader; @@ -1072,6 +1075,7 @@ bool GFXGLShader::initShader( const Torque::Path &file, /// Returns our list of shader constants, the material can get this and just set the constants it knows about const Vector& GFXGLShader::getShaderConstDesc() const { + PROFILE_SCOPE(GFXGLShader_GetShaderConstants); return mConstants; } diff --git a/Engine/source/gfx/gl/gfxGLStateBlock.cpp b/Engine/source/gfx/gl/gfxGLStateBlock.cpp index 34f816dc9..f9ad73dba 100644 --- a/Engine/source/gfx/gl/gfxGLStateBlock.cpp +++ b/Engine/source/gfx/gl/gfxGLStateBlock.cpp @@ -88,6 +88,7 @@ const GFXStateBlockDesc& GFXGLStateBlock::getDesc() const /// @param oldState The current state, used to make sure we don't set redundant states on the device. Pass NULL to reset all states. void GFXGLStateBlock::activate(const GFXGLStateBlock* oldState) { + PROFILE_SCOPE(GFXGLStateBlock_Activate); // Big scary warning copied from Apple docs // http://developer.apple.com/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_performance/chapter_13_section_2.html#//apple_ref/doc/uid/TP40001987-CH213-SW12 // Don't set a state that's already set. Once a feature is enabled, it does not need to be enabled again. diff --git a/Engine/source/gfx/gl/gfxGLTextureManager.cpp b/Engine/source/gfx/gl/gfxGLTextureManager.cpp index 70a5e4303..d9962691d 100644 --- a/Engine/source/gfx/gl/gfxGLTextureManager.cpp +++ b/Engine/source/gfx/gl/gfxGLTextureManager.cpp @@ -234,6 +234,7 @@ static void _fastTextureLoad(GFXGLTextureObject* texture, GBitmap* pDL) if(pDL->getFormat() == GFXFormatR8G8B8A8 || pDL->getFormat() == GFXFormatR8G8B8X8) { + PROFILE_SCOPE(Swizzle32_Upload); FrameAllocatorMarker mem; U8* pboMemory = (U8*)mem.alloc(bufSize); GFX->getDeviceSwizzle32()->ToBuffer(pboMemory, pDL->getBits(0), bufSize); @@ -241,6 +242,7 @@ static void _fastTextureLoad(GFXGLTextureObject* texture, GBitmap* pDL) } else { + PROFILE_SCOPE(SwizzleNull_Upload); glBufferSubData(GL_PIXEL_UNPACK_BUFFER_ARB, 0, bufSize, pDL->getBits(0) ); } @@ -262,6 +264,7 @@ static void _slowTextureLoad(GFXGLTextureObject* texture, GBitmap* pDL) bool GFXGLTextureManager::_loadTexture(GFXTextureObject *aTexture, GBitmap *pDL) { + PROFILE_SCOPE(GFXGLTextureManager_loadTexture); GFXGLTextureObject *texture = static_cast(aTexture); AssertFatal(texture->getBinding() == GL_TEXTURE_1D || texture->getBinding() == GL_TEXTURE_2D, @@ -291,6 +294,8 @@ bool GFXGLTextureManager::_loadTexture(GFXTextureObject *aTexture, GBitmap *pDL) bool GFXGLTextureManager::_loadTexture(GFXTextureObject *aTexture, DDSFile *dds) { + PROFILE_SCOPE(GFXGLTextureManager_loadTextureDDS); + AssertFatal(!(dds->mFormat == GFXFormatDXT2 || dds->mFormat == GFXFormatDXT4), "GFXGLTextureManager::_loadTexture - OpenGL does not support DXT2 or DXT4 compressed textures"); GFXGLTextureObject* texture = static_cast(aTexture); @@ -308,6 +313,8 @@ bool GFXGLTextureManager::_loadTexture(GFXTextureObject *aTexture, DDSFile *dds) numMips = 1; for(U32 i = 0; i < numMips; i++) { + PROFILE_SCOPE(GFXGLTexMan_loadSurface); + if(isCompressedFormat(dds->mFormat)) { if((!isPow2(dds->getWidth()) || !isPow2(dds->getHeight())) && GFX->getCardProfiler()->queryProfile("GL::Workaround::noCompressedNPoTTextures")) @@ -344,6 +351,7 @@ bool GFXGLTextureManager::_loadTexture(GFXTextureObject *aTexture, DDSFile *dds) bool GFXGLTextureManager::_loadTexture(GFXTextureObject *aTexture, void *raw) { + PROFILE_SCOPE(GFXGLTextureManager_loadTextureRaw); if(aTexture->getDepth() < 1) return false; diff --git a/Engine/source/gfx/gl/gfxGLTextureObject.cpp b/Engine/source/gfx/gl/gfxGLTextureObject.cpp index ed229e5d9..36c981415 100644 --- a/Engine/source/gfx/gl/gfxGLTextureObject.cpp +++ b/Engine/source/gfx/gl/gfxGLTextureObject.cpp @@ -96,6 +96,9 @@ void GFXGLTextureObject::unlock(U32 mipLevel) if(!mLockedRect.bits) return; + // I know this is in unlock, but in GL we actually do our submission in unlock. + PROFILE_SCOPE(GFXGLTextureObject_lockRT); + PRESERVE_TEXTURE(mBinding); glBindTexture(mBinding, mHandle); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, mBuffer); @@ -137,6 +140,8 @@ void GFXGLTextureObject::reInit() bool GFXGLTextureObject::copyToBmp(GBitmap * bmp) { + PROFILE_SCOPE(GFXGLTextureObject_copyToBmp); + if (!bmp) return false; @@ -175,6 +180,7 @@ bool GFXGLTextureObject::copyToBmp(GBitmap * bmp) glGetTexImage(mBinding, 0, GFXGLTextureFormat[mFormat], GFXGLTextureType[mFormat], orig); + PROFILE_START(GFXGLTextureObject_copyToBmp_pixCopy); for(int i = 0; i < srcPixelCount; ++i) { dest[0] = orig[0]; @@ -186,6 +192,7 @@ bool GFXGLTextureObject::copyToBmp(GBitmap * bmp) orig += srcBytesPerPixel; dest += dstBytesPerPixel; } + PROFILE_END(); return true; } From 909109713d5fbf6250908b870db2a494f2de6b15 Mon Sep 17 00:00:00 2001 From: Jeff Hutchinson Date: Fri, 6 May 2016 23:45:48 -0400 Subject: [PATCH 2/2] redefined the copyToBMP --- Engine/source/gfx/gl/gfxGLTextureObject.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/Engine/source/gfx/gl/gfxGLTextureObject.cpp b/Engine/source/gfx/gl/gfxGLTextureObject.cpp index 36c981415..e22daa1d8 100644 --- a/Engine/source/gfx/gl/gfxGLTextureObject.cpp +++ b/Engine/source/gfx/gl/gfxGLTextureObject.cpp @@ -140,8 +140,6 @@ void GFXGLTextureObject::reInit() bool GFXGLTextureObject::copyToBmp(GBitmap * bmp) { - PROFILE_SCOPE(GFXGLTextureObject_copyToBmp); - if (!bmp) return false;