diff --git a/Engine/lib/squish/ChangeLog b/Engine/lib/squish/ChangeLog deleted file mode 100644 index ba03f4c57..000000000 --- a/Engine/lib/squish/ChangeLog +++ /dev/null @@ -1,52 +0,0 @@ -1.10 -* Iterative cluster fit is now considered to be a new compression mode -* The core cluster fit is now 4x faster using contributions by Ignacio -Castano from NVIDIA -* The single colour lookup table has been halved by exploiting symmetry - -1.9 -* Added contributed SSE1 truncate implementation -* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions -* Cluster fit is now iterative to further reduce image error - -1.8 -* Switched from using floor to trunc for much better SSE performance (again) -* Xcode build now expects libpng in /usr/local for extra/squishpng - -1.7 -* Fixed floating-point equality issue in clusterfit sort (x86 affected only) -* Implemented proper SSE(2) floor function for 50% speedup on SSE builds -* The range fit implementation now uses the correct colour metric - -1.6 -* Fixed bug in CompressImage where masked pixels were not skipped over -* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels -* Fixed major DXT1 bug that can generate unexpected transparent pixels - -1.5 -* Added CompressMasked function to handle incomplete DXT blocks more cleanly -* Added kWeightColourByAlpha flag for better quality images when alpha blending - -1.4 -* Fixed stack overflow in rangefit - -1.3 -* Worked around SSE floor implementation bug, proper fix needed! -* This release has visual studio and makefile builds that work - -1.2 -* Added provably optimal single colour compressor -* Added extra/squishgen.cpp that generates single colour lookup tables - -1.1 -* Fixed a DXT1 colour output bug -* Changed argument order for Decompress function to match Compress -* Added GetStorageRequirements function -* Added CompressImage function -* Added DecompressImage function -* Moved squishtool.cpp to extra/squishpng.cpp -* Added extra/squishtest.cpp - -1.0 -* Initial release - diff --git a/Engine/lib/squish/LICENSE b/Engine/lib/squish/LICENSE new file mode 100644 index 000000000..ed1c78d93 --- /dev/null +++ b/Engine/lib/squish/LICENSE @@ -0,0 +1,20 @@ + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Engine/lib/squish/alpha.cpp b/Engine/lib/squish/alpha.cpp index 2d77b6ba4..0f94e2147 100644 --- a/Engine/lib/squish/alpha.cpp +++ b/Engine/lib/squish/alpha.cpp @@ -24,8 +24,9 @@ -------------------------------------------------------------------------- */ #include "alpha.h" + +#include #include -#include namespace squish { diff --git a/Engine/lib/squish/alpha.h b/Engine/lib/squish/alpha.h index 573605255..e5e7f320a 100644 --- a/Engine/lib/squish/alpha.h +++ b/Engine/lib/squish/alpha.h @@ -26,7 +26,7 @@ #ifndef SQUISH_ALPHA_H #define SQUISH_ALPHA_H -#include +#include "squish.h" namespace squish { diff --git a/Engine/lib/squish/clusterfit.cpp b/Engine/lib/squish/clusterfit.cpp index afea84880..96704460e 100644 --- a/Engine/lib/squish/clusterfit.cpp +++ b/Engine/lib/squish/clusterfit.cpp @@ -31,22 +31,21 @@ namespace squish { -ClusterFit::ClusterFit( ColourSet const* colours, int flags ) +ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) : ColourFit( colours, flags ) { // set the iteration count m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1; - // initialise the best error - m_besterror = VEC4_CONST( FLT_MAX ); - - // initialise the metric - bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 ); - if( perceptual ) - m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f ); + // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f) + if( metric ) + m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f ); else m_metric = VEC4_CONST( 1.0f ); + // initialise the best error + m_besterror = VEC4_CONST( FLT_MAX ); + // cache some values int const count = m_colours->GetCount(); Vec3 const* values = m_colours->GetPoints(); diff --git a/Engine/lib/squish/clusterfit.h b/Engine/lib/squish/clusterfit.h index 17db5d387..c882469c8 100644 --- a/Engine/lib/squish/clusterfit.h +++ b/Engine/lib/squish/clusterfit.h @@ -27,7 +27,7 @@ #ifndef SQUISH_CLUSTERFIT_H #define SQUISH_CLUSTERFIT_H -#include +#include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" @@ -37,7 +37,7 @@ namespace squish { class ClusterFit : public ColourFit { public: - ClusterFit( ColourSet const* colours, int flags ); + ClusterFit( ColourSet const* colours, int flags, float* metric ); private: bool ConstructOrdering( Vec3 const& axis, int iteration ); diff --git a/Engine/lib/squish/colourblock.h b/Engine/lib/squish/colourblock.h index df0a47217..2562561d7 100644 --- a/Engine/lib/squish/colourblock.h +++ b/Engine/lib/squish/colourblock.h @@ -26,7 +26,7 @@ #ifndef SQUISH_COLOURBLOCK_H #define SQUISH_COLOURBLOCK_H -#include +#include "squish.h" #include "maths.h" namespace squish { diff --git a/Engine/lib/squish/colourfit.cpp b/Engine/lib/squish/colourfit.cpp index dba2b87e8..11efa4674 100644 --- a/Engine/lib/squish/colourfit.cpp +++ b/Engine/lib/squish/colourfit.cpp @@ -34,6 +34,10 @@ ColourFit::ColourFit( ColourSet const* colours, int flags ) { } +ColourFit::~ColourFit() +{ +} + void ColourFit::Compress( void* block ) { bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 ); diff --git a/Engine/lib/squish/colourfit.h b/Engine/lib/squish/colourfit.h index a2d0559a3..759322329 100644 --- a/Engine/lib/squish/colourfit.h +++ b/Engine/lib/squish/colourfit.h @@ -26,9 +26,11 @@ #ifndef SQUISH_COLOURFIT_H #define SQUISH_COLOURFIT_H -#include +#include "squish.h" #include "maths.h" +#include + namespace squish { class ColourSet; @@ -37,6 +39,7 @@ class ColourFit { public: ColourFit( ColourSet const* colours, int flags ); + virtual ~ColourFit(); void Compress( void* block ); diff --git a/Engine/lib/squish/colourset.h b/Engine/lib/squish/colourset.h index dcf56ae28..0c66fe440 100644 --- a/Engine/lib/squish/colourset.h +++ b/Engine/lib/squish/colourset.h @@ -26,7 +26,7 @@ #ifndef SQUISH_COLOURSET_H #define SQUISH_COLOURSET_H -#include +#include "squish.h" #include "maths.h" namespace squish { diff --git a/Engine/lib/squish/config.h b/Engine/lib/squish/config.h index 8427407d4..2fad5576a 100644 --- a/Engine/lib/squish/config.h +++ b/Engine/lib/squish/config.h @@ -36,7 +36,7 @@ #define SQUISH_USE_SSE 0 #endif -// Internally et SQUISH_USE_SIMD when either Altivec or SSE is available. +// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available. #if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE #error "Cannot enable both Altivec and SSE!" #endif @@ -46,10 +46,4 @@ #define SQUISH_USE_SIMD 0 #endif -// TORQUE MODIFICATIONS -#ifdef TORQUE_DEBUG -# undef SQUISH_USE_SSE -# define SQUISH_USE_SSE 0 -#endif - #endif // ndef SQUISH_CONFIG_H diff --git a/Engine/lib/squish/maths.cpp b/Engine/lib/squish/maths.cpp index 59818a4d2..9af4197d3 100644 --- a/Engine/lib/squish/maths.cpp +++ b/Engine/lib/squish/maths.cpp @@ -30,6 +30,7 @@ */ #include "maths.h" +#include "simd.h" #include namespace squish { @@ -44,7 +45,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight total += weights[i]; centroid += weights[i]*points[i]; } - centroid /= total; + if( total > FLT_EPSILON ) + centroid /= total; // accumulate the covariance matrix Sym3x3 covariance( 0.0f ); @@ -65,6 +67,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight return covariance; } +#if 0 + static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue ) { // compute M @@ -224,4 +228,32 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) } } +#else + +#define POWER_ITERATION_COUNT 8 + +Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) +{ + Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f ); + Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f ); + Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f ); + Vec4 v = VEC4_CONST( 1.0f ); + for( int i = 0; i < POWER_ITERATION_COUNT; ++i ) + { + // matrix multiply + Vec4 w = row0*v.SplatX(); + w = MultiplyAdd(row1, v.SplatY(), w); + w = MultiplyAdd(row2, v.SplatZ(), w); + + // get max component from xyz in all channels + Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ())); + + // divide through and advance + v = w*Reciprocal(a); + } + return v.GetVec3(); +} + +#endif + } // namespace squish diff --git a/Engine/lib/squish/rangefit.cpp b/Engine/lib/squish/rangefit.cpp index 5a6643605..3fca1245e 100644 --- a/Engine/lib/squish/rangefit.cpp +++ b/Engine/lib/squish/rangefit.cpp @@ -30,15 +30,14 @@ namespace squish { -RangeFit::RangeFit( ColourSet const* colours, int flags ) +RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) : ColourFit( colours, flags ) { - // initialise the metric - bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 ); - if( perceptual ) - m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f ); + // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f) + if( metric ) + m_metric = Vec3( metric[0], metric[1], metric[2] ); else - m_metric = Vec3( 1.0f ); + m_metric = Vec3( 1.0f ); // initialise the best error m_besterror = FLT_MAX; diff --git a/Engine/lib/squish/rangefit.h b/Engine/lib/squish/rangefit.h index 795201993..e293bdcf3 100644 --- a/Engine/lib/squish/rangefit.h +++ b/Engine/lib/squish/rangefit.h @@ -26,7 +26,7 @@ #ifndef SQUISH_RANGEFIT_H #define SQUISH_RANGEFIT_H -#include +#include "squish.h" #include "colourfit.h" #include "maths.h" @@ -37,7 +37,7 @@ class ColourSet; class RangeFit : public ColourFit { public: - RangeFit( ColourSet const* colours, int flags ); + RangeFit( ColourSet const* colours, int flags, float* metric ); private: virtual void Compress3( void* block ); diff --git a/Engine/lib/squish/simd.h b/Engine/lib/squish/simd.h index 22bd10a46..92965e02e 100644 --- a/Engine/lib/squish/simd.h +++ b/Engine/lib/squish/simd.h @@ -27,14 +27,6 @@ #define SQUISH_SIMD_H #include "maths.h" - -#if SQUISH_USE_ALTIVEC -#include "simd_ve.h" -#elif SQUISH_USE_SSE -#include "simd_sse.h" -#else #include "simd_float.h" -#endif - #endif // ndef SQUISH_SIMD_H diff --git a/Engine/lib/squish/simd_sse.h b/Engine/lib/squish/simd_sse.h deleted file mode 100644 index e584f2a0e..000000000 --- a/Engine/lib/squish/simd_sse.h +++ /dev/null @@ -1,180 +0,0 @@ -/* ----------------------------------------------------------------------------- - - Copyright (c) 2006 Simon Brown si@sjbrown.co.uk - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------- */ - -#ifndef SQUISH_SIMD_SSE_H -#define SQUISH_SIMD_SSE_H - -#include -#if ( SQUISH_USE_SSE > 1 ) -#include -#endif - -#define SQUISH_SSE_SPLAT( a ) \ - ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) - -#define SQUISH_SSE_SHUF( x, y, z, w ) \ - ( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) ) - -namespace squish { - -#define VEC4_CONST( X ) Vec4( X ) - -class Vec4 -{ -public: - typedef Vec4 const& Arg; - - Vec4() {} - - explicit Vec4( __m128 v ) : m_v( v ) {} - - Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {} - - Vec4& operator=( Vec4 const& arg ) - { - m_v = arg.m_v; - return *this; - } - - explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {} - - Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {} - - Vec3 GetVec3() const - { -#ifdef __GNUC__ - __attribute__ ((__aligned__ (16))) float c[4]; -#else - __declspec(align(16)) float c[4]; -#endif - _mm_store_ps( c, m_v ); - return Vec3( c[0], c[1], c[2] ); - } - - Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); } - Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); } - Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); } - Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); } - - Vec4& operator+=( Arg v ) - { - m_v = _mm_add_ps( m_v, v.m_v ); - return *this; - } - - Vec4& operator-=( Arg v ) - { - m_v = _mm_sub_ps( m_v, v.m_v ); - return *this; - } - - Vec4& operator*=( Arg v ) - { - m_v = _mm_mul_ps( m_v, v.m_v ); - return *this; - } - - friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( _mm_add_ps( left.m_v, right.m_v ) ); - } - - friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( _mm_sub_ps( left.m_v, right.m_v ) ); - } - - friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( _mm_mul_ps( left.m_v, right.m_v ) ); - } - - //! Returns a*b + c - friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) - { - return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) ); - } - - //! Returns -( a*b - c ) - friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) - { - return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) ); - } - - friend Vec4 Reciprocal( Vec4::Arg v ) - { - // get the reciprocal estimate - __m128 estimate = _mm_rcp_ps( v.m_v ); - - // one round of Newton-Rhaphson refinement - __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) ); - return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); - } - - friend Vec4 Min( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( _mm_min_ps( left.m_v, right.m_v ) ); - } - - friend Vec4 Max( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( _mm_max_ps( left.m_v, right.m_v ) ); - } - - friend Vec4 Truncate( Vec4::Arg v ) - { -#if ( SQUISH_USE_SSE == 1 ) - // convert to ints - __m128 input = v.m_v; - __m64 lo = _mm_cvttps_pi32( input ); - __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) ); - - // convert to floats - __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) ); - __m128 truncated = _mm_cvtpi32_ps( part, lo ); - - // clear out the MMX multimedia state to allow FP calls later - _mm_empty(); - return Vec4( truncated ); -#else - // use SSE2 instructions - return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) ); -#endif - } - - friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) - { - __m128 bits = _mm_cmplt_ps( left.m_v, right.m_v ); - int value = _mm_movemask_ps( bits ); - return value != 0; - } - -private: - __m128 m_v; -}; - -} // namespace squish - -#endif // ndef SQUISH_SIMD_SSE_H diff --git a/Engine/lib/squish/simd_ve.h b/Engine/lib/squish/simd_ve.h deleted file mode 100644 index 9a33955ff..000000000 --- a/Engine/lib/squish/simd_ve.h +++ /dev/null @@ -1,166 +0,0 @@ -/* ----------------------------------------------------------------------------- - - Copyright (c) 2006 Simon Brown si@sjbrown.co.uk - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------- */ - -#ifndef SQUISH_SIMD_VE_H -#define SQUISH_SIMD_VE_H - -#include -#undef bool - -namespace squish { - -#define VEC4_CONST( X ) Vec4( ( vector float )( X ) ) - -class Vec4 -{ -public: - typedef Vec4 Arg; - - Vec4() {} - - explicit Vec4( vector float v ) : m_v( v ) {} - - Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {} - - Vec4& operator=( Vec4 const& arg ) - { - m_v = arg.m_v; - return *this; - } - - explicit Vec4( float s ) - { - union { vector float v; float c[4]; } u; - u.c[0] = s; - u.c[1] = s; - u.c[2] = s; - u.c[3] = s; - m_v = u.v; - } - - Vec4( float x, float y, float z, float w ) - { - union { vector float v; float c[4]; } u; - u.c[0] = x; - u.c[1] = y; - u.c[2] = z; - u.c[3] = w; - m_v = u.v; - } - - Vec3 GetVec3() const - { - union { vector float v; float c[4]; } u; - u.v = m_v; - return Vec3( u.c[0], u.c[1], u.c[2] ); - } - - Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); } - Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); } - Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); } - Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); } - - Vec4& operator+=( Arg v ) - { - m_v = vec_add( m_v, v.m_v ); - return *this; - } - - Vec4& operator-=( Arg v ) - { - m_v = vec_sub( m_v, v.m_v ); - return *this; - } - - Vec4& operator*=( Arg v ) - { - m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) ); - return *this; - } - - friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( vec_add( left.m_v, right.m_v ) ); - } - - friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( vec_sub( left.m_v, right.m_v ) ); - } - - friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) ); - } - - //! Returns a*b + c - friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) - { - return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) ); - } - - //! Returns -( a*b - c ) - friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c ) - { - return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) ); - } - - friend Vec4 Reciprocal( Vec4::Arg v ) - { - // get the reciprocal estimate - vector float estimate = vec_re( v.m_v ); - - // one round of Newton-Rhaphson refinement - vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) ); - return Vec4( vec_madd( diff, estimate, estimate ) ); - } - - friend Vec4 Min( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( vec_min( left.m_v, right.m_v ) ); - } - - friend Vec4 Max( Vec4::Arg left, Vec4::Arg right ) - { - return Vec4( vec_max( left.m_v, right.m_v ) ); - } - - friend Vec4 Truncate( Vec4::Arg v ) - { - return Vec4( vec_trunc( v.m_v ) ); - } - - friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) - { - return vec_any_lt( left.m_v, right.m_v ) != 0; - } - -private: - vector float m_v; -}; - -} // namespace squish - -#endif // ndef SQUISH_SIMD_VE_H diff --git a/Engine/lib/squish/singlecolourfit.cpp b/Engine/lib/squish/singlecolourfit.cpp index 7929ce120..e8a011769 100644 --- a/Engine/lib/squish/singlecolourfit.cpp +++ b/Engine/lib/squish/singlecolourfit.cpp @@ -26,7 +26,6 @@ #include "singlecolourfit.h" #include "colourset.h" #include "colourblock.h" -#include namespace squish { diff --git a/Engine/lib/squish/singlecolourfit.h b/Engine/lib/squish/singlecolourfit.h index 0388fda02..54ec17ebb 100644 --- a/Engine/lib/squish/singlecolourfit.h +++ b/Engine/lib/squish/singlecolourfit.h @@ -26,7 +26,7 @@ #ifndef SQUISH_SINGLECOLOURFIT_H #define SQUISH_SINGLECOLOURFIT_H -#include +#include "squish.h" #include "colourfit.h" namespace squish { diff --git a/Engine/lib/squish/singlecolourlookup.inl b/Engine/lib/squish/singlecolourlookup.inl index f1c95a102..5e911745e 100644 --- a/Engine/lib/squish/singlecolourlookup.inl +++ b/Engine/lib/squish/singlecolourlookup.inl @@ -1,3 +1,27 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ static SingleColourLookup const lookup_5_3[] = { diff --git a/Engine/lib/squish/squish-Info.plist b/Engine/lib/squish/squish-Info.plist deleted file mode 100644 index 5cb05e056..000000000 --- a/Engine/lib/squish/squish-Info.plist +++ /dev/null @@ -1,20 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - ${EXECUTABLE_NAME} - CFBundleIdentifier - com.sjbrown.squish - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - FMWK - CFBundleSignature - ???? - CFBundleVersion - 1.0 - - diff --git a/Engine/lib/squish/squish.cpp b/Engine/lib/squish/squish.cpp index bbe89bfcf..cd91f8746 100644 --- a/Engine/lib/squish/squish.cpp +++ b/Engine/lib/squish/squish.cpp @@ -23,7 +23,7 @@ -------------------------------------------------------------------------- */ -#include +#include "squish.h" #include "colourset.h" #include "maths.h" #include "rangefit.h" @@ -37,37 +37,58 @@ namespace squish { static int FixFlags( int flags ) { // grab the flag bits - int method = flags & ( kDxt1 | kDxt3 | kDxt5 ); + int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 ); int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit ); - int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform ); int extra = flags & kWeightColourByAlpha; // set defaults - if( method != kDxt3 && method != kDxt5 ) + if ( method != kDxt3 + && method != kDxt5 + && method != kBc4 + && method != kBc5 ) + { method = kDxt1; - if( fit != kColourRangeFit ) + } + if( fit != kColourRangeFit && fit != kColourIterativeClusterFit ) fit = kColourClusterFit; - if( metric != kColourMetricUniform ) - metric = kColourMetricPerceptual; // done - return method | fit | metric | extra; + return method | fit | extra; } -void Compress( u8 const* rgba, void* block, int flags ) -{ - // compress with full mask - CompressMasked( rgba, 0xffff, block, flags ); -} - -void CompressMasked( u8 const* rgba, int mask, void* block, int flags ) +void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric ) { // fix any bad flags flags = FixFlags( flags ); + if ( ( flags & ( kBc4 | kBc5 ) ) != 0 ) + { + u8 alpha[16*4]; + for( int i = 0; i < 16; ++i ) + { + alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A + } + + u8* rBlock = reinterpret_cast< u8* >( block ); + CompressAlphaDxt5( alpha, mask, rBlock ); + + if ( ( flags & ( kBc5 ) ) != 0 ) + { + for( int i = 0; i < 16; ++i ) + { + alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A + } + + u8* gBlock = reinterpret_cast< u8* >( block ) + 8; + CompressAlphaDxt5( alpha, mask, gBlock ); + } + + return; + } + // get the block locations void* colourBlock = block; - void* alphaBock = block; + void* alphaBlock = block; if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 ) colourBlock = reinterpret_cast< u8* >( block ) + 8; @@ -84,21 +105,21 @@ void CompressMasked( u8 const* rgba, int mask, void* block, int flags ) else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 ) { // do a range fit - RangeFit fit( &colours, flags ); + RangeFit fit( &colours, flags, metric ); fit.Compress( colourBlock ); } else { // default to a cluster fit (could be iterative or not) - ClusterFit fit( &colours, flags ); + ClusterFit fit( &colours, flags, metric ); fit.Compress( colourBlock ); } // compress alpha separately if necessary if( ( flags & kDxt3 ) != 0 ) - CompressAlphaDxt3( rgba, mask, alphaBock ); + CompressAlphaDxt3( rgba, mask, alphaBlock ); else if( ( flags & kDxt5 ) != 0 ) - CompressAlphaDxt5( rgba, mask, alphaBock ); + CompressAlphaDxt5( rgba, mask, alphaBlock ); } void Decompress( u8* rgba, void const* block, int flags ) @@ -129,18 +150,18 @@ int GetStorageRequirements( int width, int height, int flags ) // compute the storage requirements int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 ); - int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16; - return blockcount*blocksize; + int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; + return blockcount*blocksize; } -void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags ) +void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric ) { // fix any bad flags flags = FixFlags( flags ); // initialise the block output u8* targetBlock = reinterpret_cast< u8* >( blocks ); - int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16; + int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; // loop over blocks for( int y = 0; y < height; y += 4 ) @@ -179,7 +200,7 @@ void CompressImage( u8 const* rgba, int width, int height, void* blocks, int fla } // compress it into the output - CompressMasked( sourceRgba, mask, targetBlock, flags ); + CompressMasked( sourceRgba, mask, targetBlock, flags, metric ); // advance targetBlock += bytesPerBlock; @@ -194,7 +215,7 @@ void DecompressImage( u8* rgba, int width, int height, void const* blocks, int f // initialise the block input u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks ); - int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16; + int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; // loop over blocks for( int y = 0; y < height; y += 4 ) diff --git a/Engine/lib/squish/squish.h b/Engine/lib/squish/squish.h index 5f5ac149d..175375f83 100644 --- a/Engine/lib/squish/squish.h +++ b/Engine/lib/squish/squish.h @@ -39,74 +39,42 @@ typedef unsigned char u8; enum { //! Use DXT1 compression. - kDxt1 = ( 1 << 0 ), - + kDxt1 = ( 1 << 0 ), + //! Use DXT3 compression. - kDxt3 = ( 1 << 1 ), - + kDxt3 = ( 1 << 1 ), + //! Use DXT5 compression. - kDxt5 = ( 1 << 2 ), - - //! Use a very slow but very high quality colour compressor. - kColourIterativeClusterFit = ( 1 << 8 ), - + kDxt5 = ( 1 << 2 ), + + //! Use BC4 compression. + kBc4 = ( 1 << 3 ), + + //! Use BC5 compression. + kBc5 = ( 1 << 4 ), + //! Use a slow but high quality colour compressor (the default). - kColourClusterFit = ( 1 << 3 ), - + kColourClusterFit = ( 1 << 5 ), + //! Use a fast but low quality colour compressor. - kColourRangeFit = ( 1 << 4 ), - - //! Use a perceptual metric for colour error (the default). - kColourMetricPerceptual = ( 1 << 5 ), + kColourRangeFit = ( 1 << 6 ), - //! Use a uniform metric for colour error. - kColourMetricUniform = ( 1 << 6 ), - //! Weight the colour by alpha during cluster fit (disabled by default). - kWeightColourByAlpha = ( 1 << 7 ) + kWeightColourByAlpha = ( 1 << 7 ), + + //! Use a very slow but very high quality colour compressor. + kColourIterativeClusterFit = ( 1 << 8 ), }; // ----------------------------------------------------------------------------- -/*! @brief Compresses a 4x4 block of pixels. - - @param rgba The rgba values of the 16 source pixels. - @param block Storage for the compressed DXT block. - @param flags Compression flags. - - The source pixels should be presented as a contiguous array of 16 rgba - values, with each component as 1 byte each. In memory this should be: - - { r1, g1, b1, a1, .... , r16, g16, b16, a16 } - - The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, - however, DXT1 will be used by default if none is specified. When using DXT1 - compression, 8 bytes of storage are required for the compressed DXT block. - DXT3 and DXT5 compression require 16 bytes of storage per block. - - The flags parameter can also specify a preferred colour compressor and - colour error metric to use when fitting the RGB components of the data. - Possible colour compressors are: kColourClusterFit (the default), - kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics - are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no - flags are specified in any particular category then the default will be - used. Unknown flags are ignored. - - When using kColourClusterFit, an additional flag can be specified to - weight the colour of each pixel by its alpha value. For images that are - rendered using alpha blending, this can significantly increase the - perceived quality. -*/ -void Compress( u8 const* rgba, void* block, int flags ); - -// ----------------------------------------------------------------------------- - /*! @brief Compresses a 4x4 block of pixels. @param rgba The rgba values of the 16 source pixels. @param mask The valid pixel mask. @param block Storage for the compressed DXT block. @param flags Compression flags. + @param metric An optional perceptual metric. The source pixels should be presented as a contiguous array of 16 rgba values, with each component as 1 byte each. In memory this should be: @@ -125,20 +93,68 @@ void Compress( u8 const* rgba, void* block, int flags ); compression, 8 bytes of storage are required for the compressed DXT block. DXT3 and DXT5 compression require 16 bytes of storage per block. - The flags parameter can also specify a preferred colour compressor and - colour error metric to use when fitting the RGB components of the data. - Possible colour compressors are: kColourClusterFit (the default), - kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics - are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no - flags are specified in any particular category then the default will be - used. Unknown flags are ignored. + The flags parameter can also specify a preferred colour compressor to use + when fitting the RGB components of the data. Possible colour compressors + are: kColourClusterFit (the default), kColourRangeFit (very fast, low + quality) or kColourIterativeClusterFit (slowest, best quality). + + When using kColourClusterFit or kColourIterativeClusterFit, an additional + flag can be specified to weight the importance of each pixel by its alpha + value. For images that are rendered using alpha blending, this can + significantly increase the perceived quality. - When using kColourClusterFit, an additional flag can be specified to - weight the colour of each pixel by its alpha value. For images that are - rendered using alpha blending, this can significantly increase the - perceived quality. + The metric parameter can be used to weight the relative importance of each + colour channel, or pass NULL to use the default uniform weight of + { 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that + allowed either uniform or "perceptual" weights with the fixed values + { 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a + contiguous array of 3 floats. */ -void CompressMasked( u8 const* rgba, int mask, void* block, int flags ); +void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 ); + +// ----------------------------------------------------------------------------- + +/*! @brief Compresses a 4x4 block of pixels. + + @param rgba The rgba values of the 16 source pixels. + @param block Storage for the compressed DXT block. + @param flags Compression flags. + @param metric An optional perceptual metric. + + The source pixels should be presented as a contiguous array of 16 rgba + values, with each component as 1 byte each. In memory this should be: + + { r1, g1, b1, a1, .... , r16, g16, b16, a16 } + + The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, + however, DXT1 will be used by default if none is specified. When using DXT1 + compression, 8 bytes of storage are required for the compressed DXT block. + DXT3 and DXT5 compression require 16 bytes of storage per block. + + The flags parameter can also specify a preferred colour compressor to use + when fitting the RGB components of the data. Possible colour compressors + are: kColourClusterFit (the default), kColourRangeFit (very fast, low + quality) or kColourIterativeClusterFit (slowest, best quality). + + When using kColourClusterFit or kColourIterativeClusterFit, an additional + flag can be specified to weight the importance of each pixel by its alpha + value. For images that are rendered using alpha blending, this can + significantly increase the perceived quality. + + The metric parameter can be used to weight the relative importance of each + colour channel, or pass NULL to use the default uniform weight of + { 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that + allowed either uniform or "perceptual" weights with the fixed values + { 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a + contiguous array of 3 floats. + + This method is an inline that calls CompressMasked with a mask of 0xffff, + provided for compatibility with older versions of squish. +*/ +inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 ) +{ + CompressMasked( rgba, 0xffff, block, flags, metric ); +} // ----------------------------------------------------------------------------- @@ -186,6 +202,7 @@ int GetStorageRequirements( int width, int height, int flags ); @param height The height of the source image. @param blocks Storage for the compressed output. @param flags Compression flags. + @param metric An optional perceptual metric. The source pixels should be presented as a contiguous array of width*height rgba values, with each component as 1 byte each. In memory this should be: @@ -197,24 +214,29 @@ int GetStorageRequirements( int width, int height, int flags ); compression, 8 bytes of storage are required for each compressed DXT block. DXT3 and DXT5 compression require 16 bytes of storage per block. - The flags parameter can also specify a preferred colour compressor and - colour error metric to use when fitting the RGB components of the data. - Possible colour compressors are: kColourClusterFit (the default), - kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics - are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no - flags are specified in any particular category then the default will be - used. Unknown flags are ignored. + The flags parameter can also specify a preferred colour compressor to use + when fitting the RGB components of the data. Possible colour compressors + are: kColourClusterFit (the default), kColourRangeFit (very fast, low + quality) or kColourIterativeClusterFit (slowest, best quality). + + When using kColourClusterFit or kColourIterativeClusterFit, an additional + flag can be specified to weight the importance of each pixel by its alpha + value. For images that are rendered using alpha blending, this can + significantly increase the perceived quality. - When using kColourClusterFit, an additional flag can be specified to - weight the colour of each pixel by its alpha value. For images that are - rendered using alpha blending, this can significantly increase the - perceived quality. + The metric parameter can be used to weight the relative importance of each + colour channel, or pass NULL to use the default uniform weight of + { 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that + allowed either uniform or "perceptual" weights with the fixed values + { 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a + contiguous array of 3 floats. - Internally this function calls squish::Compress for each block. To see how - much memory is required in the compressed image, use - squish::GetStorageRequirements. + Internally this function calls squish::CompressMasked for each block, which + allows for pixels outside the image to take arbitrary values. The function + squish::GetStorageRequirements can be called to compute the amount of memory + to allocate for the compressed output. */ -void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags ); +void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 ); // ----------------------------------------------------------------------------- diff --git a/Engine/lib/squish/texture_compression_s3tc.txt b/Engine/lib/squish/texture_compression_s3tc.txt deleted file mode 100644 index f229cf367..000000000 --- a/Engine/lib/squish/texture_compression_s3tc.txt +++ /dev/null @@ -1,508 +0,0 @@ -Name - - EXT_texture_compression_s3tc - -Name Strings - - GL_EXT_texture_compression_s3tc - -Contact - - Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com) - -Status - - FINAL - -Version - - 1.1, 16 November 2001 (containing only clarifications relative to - version 1.0, dated 7 July 2000) - -Number - - 198 - -Dependencies - - OpenGL 1.1 is required. - - GL_ARB_texture_compression is required. - - This extension is written against the OpenGL 1.2.1 Specification. - -Overview - - This extension provides additional texture compression functionality - specific to S3's S3TC format (called DXTC in Microsoft's DirectX API), - subject to all the requirements and limitations described by the extension - GL_ARB_texture_compression. - - This extension supports DXT1, DXT3, and DXT5 texture compression formats. - For the DXT1 image format, this specification supports an RGB-only mode - and a special RGBA mode with single-bit "transparent" alpha. - -IP Status - - Contact S3 Incorporated (http://www.s3.com) regarding any intellectual - property issues associated with implementing this extension. - - WARNING: Vendors able to support S3TC texture compression in Direct3D - drivers do not necessarily have the right to use the same functionality in - OpenGL. - -Issues - - (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported? - - RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4 - would require some rework to the TexEnv definition (maybe add a new - base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats. - Note that the EXT_texture_env_combine extension (which extends normal - TexEnv modes) can be used to support textures with premultipled alpha. - - (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported - or should we use only the DXT enums? - - RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3 - and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is - also) yet one format must be chosen up front. - - (3) Should TexSubImage support all block-aligned edits or just the minimal - functionality required by the ARB_texture_compression extension? - - RESOLVED: Allow all valid block-aligned edits. - - (4) A pre-compressed image with a DXT1 format can be used as either an - RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has - transparent texels, how are they treated in each format? - - RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format - is decoded as RGB (where alpha is effectively one for all texels), - while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all - texels with "transparent" encodings). Otherwise, the formats are - identical. - - (5) Is the encoding of the RGB components for DXT1 formats correct in this - spec? MSDN documentation does not specify an RGB color for the - "transparent" encoding. Is it really black? - - RESOLVED: Yes. The specification for the DXT1 format initially - required black, but later changed that requirement to a - recommendation. All vendors involved in the definition of this - specification support black. In addition, specifying black has a - useful behavior. - - When blending multiple texels (GL_LINEAR filtering), mixing opaque and - transparent samples is problematic. Defining a black color on - transparent texels achieves a sensible result that works like a - texture with premultiplied alpha. For example, if three opaque white - and one transparent sample is being averaged, the result would be a - 75% intensity gray (with an alpha of 75%). This is the same result on - the color channels as would be obtained using a white color, 75% - alpha, and a SRC_ALPHA blend factor. - - (6) Is the encoding of the RGB components for DXT3 and DXT5 formats - correct in this spec? MSDN documentation suggests that the RGB blocks - for DXT3 and DXT5 are decoded as described by the DXT1 format. - - RESOLVED: Yes -- this appears to be a bug in the MSDN documentation. - The specification for the DXT2-DXT5 formats require decoding using the - opaque block encoding, regardless of the relative values of "color0" - and "color1". - -New Procedures and Functions - - None. - -New Tokens - - Accepted by the parameter of TexImage2D, CopyTexImage2D, - and CompressedTexImage2DARB and the parameter of - CompressedTexSubImage2DARB: - - COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 - COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 - COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 - COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 - -Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation) - - None. - -Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization) - - Add to Table 3.16.1: Specific Compressed Internal Formats - - Compressed Internal Format Base Internal Format - ========================== ==================== - COMPRESSED_RGB_S3TC_DXT1_EXT RGB - COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA - COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA - COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA - - - Modify Section 3.8.2, Alternate Image Specification - - (add to end of TexSubImage discussion, p.123 -- after edit from the - ARB_texture_compression spec) - - If the internal format of the texture image being modified is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the - texture is stored using one of the several S3TC compressed texture image - formats. Such images are easily edited along 4x4 texel boundaries, so the - limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed. - TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION - error only if one of the following conditions occurs: - - * is not a multiple of four or equal to TEXTURE_WIDTH, - unless and are both zero. - * is not a multiple of four or equal to TEXTURE_HEIGHT, - unless and are both zero. - * or is not a multiple of four. - - The contents of any 4x4 block of texels of an S3TC compressed texture - image that does not intersect the area being modified are preserved during - valid TexSubImage2D and CopyTexSubImage2D calls. - - - Add to Section 3.8.2, Alternate Image Specification (adding to the end of - the CompressedTexImage section introduced by the ARB_texture_compression - spec) - - If is COMPRESSED_RGB_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or - COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one - of several S3TC compressed texture image formats. The S3TC texture - compression algorithm supports only 2D images without borders. - CompressedTexImage1DARB and CompressedTexImage3DARB produce an - INVALID_ENUM error if is an S3TC format. - CompressedTexImage2DARB will produce an INVALID_OPERATION error if - is non-zero. - - - Add to Section 3.8.2, Alternate Image Specification (adding to the end of - the CompressedTexSubImage section introduced by the - ARB_texture_compression spec) - - If the internal format of the texture image being modified is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the - texture is stored using one of the several S3TC compressed texture image - formats. Since the S3TC texture compression algorithm supports only 2D - images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce - an INVALID_ENUM error if is an S3TC format. Since S3TC images - are easily edited along 4x4 texel boundaries, the limitations on - CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result - in an INVALID_OPERATION error only if one of the following conditions - occurs: - - * is not a multiple of four or equal to TEXTURE_WIDTH. - * is not a multiple of four or equal to TEXTURE_HEIGHT. - * or is not a multiple of four. - - The contents of any 4x4 block of texels of an S3TC compressed texture - image that does not intersect the area being modified are preserved during - valid TexSubImage2D and CopyTexSubImage2D calls. - -Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment -Operations and the Frame Buffer) - - None. - -Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions) - - None. - -Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and -State Requests) - - None. - -Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance) - - None. - -Additions to the AGL/GLX/WGL Specifications - - None. - -GLX Protocol - - None. - -Errors - - INVALID_ENUM is generated by CompressedTexImage1DARB or - CompressedTexImage3DARB if is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT. - - INVALID_OPERATION is generated by CompressedTexImage2DARB if - is COMPRESSED_RGB_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or - COMPRESSED_RGBA_S3TC_DXT5_EXT and is not equal to zero. - - INVALID_ENUM is generated by CompressedTexSubImage1DARB or - CompressedTexSubImage3DARB if is COMPRESSED_RGB_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or - COMPRESSED_RGBA_S3TC_DXT5_EXT. - - INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or - CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of - the following apply: is not a multiple of four or equal to - TEXTURE_WIDTH; is not a multiple of four or equal to - TEXTURE_HEIGHT; or is not a multiple of four. - - - The following restrictions from the ARB_texture_compression specification - do not apply to S3TC texture formats, since subimage modification is - straightforward as long as the subimage is properly aligned. - - DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D, - DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or - DELETE: CopyTexSubImage3D if the internal format of the texture image is - DELETE: compressed and , , or does not equal - DELETE: -b, where b is value of TEXTURE_BORDER. - - DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB, - DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the - DELETE: entire texture image is not being edited: if , - DELETE: , or is greater than -b, + is - DELETE: less than w+b, + is less than h+b, or - DELETE: + is less than d+b, where b is the value of - DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of - DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH. - - See also errors in the GL_ARB_texture_compression specification. - -New State - - In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT - subscript for Z by 4 in the "Type" row. - -New Implementation Dependent State - - None - -Appendix - - S3TC Compressed Texture Image Formats - - Compressed texture images stored using the S3TC compressed image formats - are represented as a collection of 4x4 texel blocks, where each block - contains 64 or 128 bits of texel data. The image is encoded as a normal - 2D raster image in which each 4x4 block is treated as a single pixel. If - an S3TC image has a width or height less than four, the data corresponding - to texels outside the image are irrelevant and undefined. - - When an S3TC image with a width of , height of , and block size of - (8 or 16 bytes) is decoded, the corresponding image size (in - bytes) is: - - ceil(/4) * ceil(/4) * blocksize. - - When decoding an S3TC image, the block containing the texel at offset - (, ) begins at an offset (in bytes) relative to the base of the - image of: - - blocksize * (ceil(/4) * floor(/4) + floor(/4)). - - The data corresponding to a specific texel (, ) are extracted from a - 4x4 texel block using a relative (x,y) value of - - ( modulo 4, modulo 4). - - There are four distinct S3TC image formats: - - COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64 - bits of RGB image data. - - Each RGB image data block is encoded as a sequence of 8 bytes, called (in - order of increasing address): - - c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3 - - The 8 bytes of the block are decoded into three quantities: - - color0 = c0_lo + c0_hi * 256 - color1 = c1_lo + c1_hi * 256 - bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3)) - - color0 and color1 are 16-bit unsigned integers that are unpacked to - RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with - a of RGB and a type of UNSIGNED_SHORT_5_6_5. - - bits is a 32-bit unsigned integer, from which a two-bit control code - is extracted for a texel at location (x,y) in the block using: - - code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0] - - where bit 31 is the most significant and bit 0 is the least - significant bit. - - The RGB color for a texel at location (x,y) in the block is given by: - - RGB0, if color0 > color1 and code(x,y) == 0 - RGB1, if color0 > color1 and code(x,y) == 1 - (2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2 - (RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3 - - RGB0, if color0 <= color1 and code(x,y) == 0 - RGB1, if color0 <= color1 and code(x,y) == 1 - (RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2 - BLACK, if color0 <= color1 and code(x,y) == 3 - - Arithmetic operations are done per component, and BLACK refers to an - RGB color where red, green, and blue are all zero. - - Since this image has an RGB format, there is no alpha component and the - image is considered fully opaque. - - - COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64 - bits of RGB image data and minimal alpha information. The RGB components - of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT. - - The alpha component for a texel at location (x,y) in the block is - given by: - - 0.0, if color0 <= color1 and code(x,y) == 3 - 1.0, otherwise - - IMPORTANT: When encoding an RGBA image into a format using 1-bit - alpha, any texels with an alpha component less than 0.5 end up with an - alpha of 0.0 and any texels with an alpha component greater than or - equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image - into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red, - green, and blue components of any texels with a final alpha of 0.0 - will automatically be zero (black). If this behavior is not desired - by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT. - This format will never be used when a generic compressed internal - format (Table 3.16.2) is specified, although the nearly identical - format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be. - - - COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64 - bits of uncompressed alpha image data followed by 64 bits of RGB image - data. - - Each RGB image data block is encoded according to the - COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code - bits always use the non-transparent encodings. In other words, they are - treated as though color0 > color1, regardless of the actual values of - color0 and color1. - - Each alpha image data block is encoded as a sequence of 8 bytes, called - (in order of increasing address): - - a0, a1, a2, a3, a4, a5, a6, a7 - - The 8 bytes of the block are decoded into one 64-bit integer: - - alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 + - 256 * (a5 + 256 * (a6 + 256 * a7)))))) - - alpha is a 64-bit unsigned integer, from which a four-bit alpha value - is extracted for a texel at location (x,y) in the block using: - - alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0] - - where bit 63 is the most significant and bit 0 is the least - significant bit. - - The alpha component for a texel at location (x,y) in the block is - given by alpha(x,y) / 15. - - - COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64 - bits of compressed alpha image data followed by 64 bits of RGB image data. - - Each RGB image data block is encoded according to the - COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code - bits always use the non-transparent encodings. In other words, they are - treated as though color0 > color1, regardless of the actual values of - color0 and color1. - - Each alpha image data block is encoded as a sequence of 8 bytes, called - (in order of increasing address): - - alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5 - - The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha - components by multiplying by 1/255. - - The 6 "bits" bytes of the block are decoded into one 48-bit integer: - - bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + - 256 * (bits_4 + 256 * bits_5)))) - - bits is a 48-bit unsigned integer, from which a three-bit control code - is extracted for a texel at location (x,y) in the block using: - - code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0] - - where bit 47 is the most significant and bit 0 is the least - significant bit. - - The alpha component for a texel at location (x,y) in the block is - given by: - - alpha0, code(x,y) == 0 - alpha1, code(x,y) == 1 - - (6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2 - (5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3 - (4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4 - (3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5 - (2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6 - (1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7 - - (4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2 - (3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3 - (2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4 - (1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5 - 0.0, alpha0 <= alpha1 and code(x,y) == 6 - 1.0, alpha0 <= alpha1 and code(x,y) == 7 - - -Revision History - - 1.1, 11/16/01 pbrown: Updated contact info, clarified where texels - fall within a single block. - - 1.0, 07/07/00 prbrown1: Published final version agreed to by working - group members. - - 0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls - do not modify existing texels outside the - modified blocks. Added caveat to allow for a - (0,0)-anchored TexSubImage operation of - arbitrary size. - - 0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings - where the MSDN documentation doesn't match what - is really done. Added enum values from the - extension registry. - - 0.4, 03/28/00 prbrown1: Updated to reflect final version of the - ARB_texture_compression extension. Allowed - block-aligned TexSubImage calls. - - 0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB - blocks in the DXT3 and DXT5 formats (they don't - ever use the "transparent" encoding). Fixed - decoding of DXT1 blocks. Pointed out issue of - "transparent" texels in DXT1 encodings having - different behaviors for RGB and RGBA internal - formats. - - 0.2, 02/23/00 prbrown1: Minor revisions; added several issues. - - 0.11, 02/17/00 prbrown1: Slight modification to error semantics - (INVALID_ENUM instead of INVALID_OPERATION). - - 0.1, 02/15/00 prbrown1: Initial revision.