Merge pull request #1761 from Bloodknight/intrinsicsfix

Intrinsicsfix
This commit is contained in:
Areloch 2016-09-13 21:43:18 -05:00 committed by GitHub
commit f88975121d
7 changed files with 9 additions and 10 deletions

View file

@ -285,8 +285,7 @@ U32 OggTheoraDecoder::read( OggTheoraFrame** buffer, U32 num )
// Transcode the packet.
#if ( defined( TORQUE_COMPILER_GCC ) || defined( TORQUE_COMPILER_VISUALC ) ) && defined( TORQUE_CPU_X86 )
#if ( defined( TORQUE_COMPILER_GCC ) || defined( TORQUE_COMPILER_VISUALC ) ) && (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
if( ( mTranscoder == TRANSCODER_Auto || mTranscoder == TRANSCODER_SSE2420RGBA ) &&
getDecoderPixelFormat() == PIXEL_FORMAT_420 &&
Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 &&
@ -420,7 +419,7 @@ void OggTheoraDecoder::_transcode( th_ycbcr_buffer ycbcr, U8* buffer, const U32
}
//-----------------------------------------------------------------------------
#if defined( TORQUE_CPU_X86 )
#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
void OggTheoraDecoder::_transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height, U32 pitch )
{
AssertFatal( width % 2 == 0, "OggTheoraDecoder::_transcode420toRGBA_SSE2() - width must be multiple of 2" );
@ -560,7 +559,7 @@ void OggTheoraDecoder::_transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buff
jnz hloop
};
#elif defined( TORQUE_COMPILER_GCC ) && defined( TORQUE_CPU_X86 )
#elif defined( TORQUE_COMPILER_GCC ) && (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
asm( "pushal\n" // Save all general-purpose registers.

View file

@ -172,7 +172,7 @@ class OggTheoraDecoder : public OggDecoder,
/// Generic transcoder going from any of the Y'CbCr pixel formats to
/// any RGB format (that is supported by GFXFormatUtils).
void _transcode( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height );
#if defined( TORQUE_CPU_X86 )
#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
/// Transcoder with fixed 4:2:0 to RGBA conversion using SSE2 assembly. Unused on 64 bit archetecture.
void _transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height, U32 pitch );
#endif

View file

@ -203,7 +203,7 @@ extern "C"
void SSE_MatrixF_x_MatrixF_Aligned(const F32 *matA, const F32 *matB, F32 *result);
}
#elif defined( TORQUE_COMPILER_GCC ) && defined( TORQUE_CPU_X86 )
#elif defined( TORQUE_COMPILER_GCC ) && (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
#define ADD_SSE_FN
void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)

View file

@ -23,7 +23,7 @@
#ifndef _TSMESHINTRINSICS_ARCH_H_
#define _TSMESHINTRINSICS_ARCH_H_
#if defined(TORQUE_CPU_X86)
#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
# // x86 CPU family implementations
extern void zero_vert_normal_bulk_SSE(const dsize_t count, U8 * __restrict const outPtr, const dsize_t outStride);
#

View file

@ -21,7 +21,7 @@
//-----------------------------------------------------------------------------
#include "ts/tsMesh.h"
#if defined(TORQUE_CPU_X86)
#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
#include "ts/tsMeshIntrinsics.h"
#include <xmmintrin.h>

View file

@ -21,7 +21,7 @@
//-----------------------------------------------------------------------------
#include "ts/tsMesh.h"
#if defined(TORQUE_CPU_X86) && (_MSC_VER >= 1500)
#if (defined(TORQUE_CPU_X86) || defined( TORQUE_CPU_X64 )) && (_MSC_VER >= 1500)
#include "ts/tsMeshIntrinsics.h"
#include <smmintrin.h>

View file

@ -65,7 +65,7 @@ MODULE_BEGIN( TSMeshIntrinsics )
// Find the best implementation for the current CPU
if(Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
{
#if defined(TORQUE_CPU_X86)
#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
zero_vert_normal_bulk = zero_vert_normal_bulk_SSE;
#endif