squish update (primarily to add additional formats for later usage)

2026-03-08 06:50:42 +00:00 · 2017-03-30 20:44:35 -05:00 · 2017-03-30 20:44:35 -05:00 · ac37547d4f
commit ac37547d4f
parent 9c7b5eec73
24 changed files with 256 additions and 1072 deletions
--- a/Engine/lib/squish/ChangeLog
+++ b/Engine/lib/squish/ChangeLog
@ -1,52 +0,0 @@
-1.10
-* Iterative cluster fit is now considered to be a new compression mode
-* The core cluster fit is now 4x faster using contributions by Ignacio
-Castano from NVIDIA
-* The single colour lookup table has been halved by exploiting symmetry
-
-1.9
-* Added contributed SSE1 truncate implementation
-* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions
-* Cluster fit is now iterative to further reduce image error
-
-1.8
-* Switched from using floor to trunc for much better SSE performance (again)
-* Xcode build now expects libpng in /usr/local for extra/squishpng
-
-1.7
-* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
-* Implemented proper SSE(2) floor function for 50% speedup on SSE builds 
-* The range fit implementation now uses the correct colour metric
-
-1.6
-* Fixed bug in CompressImage where masked pixels were not skipped over
-* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
-* Fixed major DXT1 bug that can generate unexpected transparent pixels
-
-1.5
-* Added CompressMasked function to handle incomplete DXT blocks more cleanly
-* Added kWeightColourByAlpha flag for better quality images when alpha blending
-
-1.4
-* Fixed stack overflow in rangefit
-
-1.3
-* Worked around SSE floor implementation bug, proper fix needed!
-* This release has visual studio and makefile builds that work
-
-1.2
-* Added provably optimal single colour compressor
-* Added extra/squishgen.cpp that generates single colour lookup tables
-
-1.1
-* Fixed a DXT1 colour output bug
-* Changed argument order for Decompress function to match Compress
-* Added GetStorageRequirements function
-* Added CompressImage function
-* Added DecompressImage function
-* Moved squishtool.cpp to extra/squishpng.cpp
-* Added extra/squishtest.cpp
-
-1.0
-* Initial release
-
--- a/Engine/lib/squish/LICENSE
+++ b/Engine/lib/squish/LICENSE
@ -0,0 +1,20 @@
+	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/Engine/lib/squish/alpha.cpp
+++ b/Engine/lib/squish/alpha.cpp
@ -24,8 +24,9 @@
   -------------------------------------------------------------------------- */
   
 #include "alpha.h"
+
+#include <climits>
 #include <algorithm>
-#include <limits.h> 

 namespace squish {

--- a/Engine/lib/squish/alpha.h
+++ b/Engine/lib/squish/alpha.h
@ -26,7 +26,7 @@
 #ifndef SQUISH_ALPHA_H
 #define SQUISH_ALPHA_H

-#include <squish.h>
+#include "squish.h"

 namespace squish {

--- a/Engine/lib/squish/clusterfit.cpp
+++ b/Engine/lib/squish/clusterfit.cpp
@ -31,22 +31,21 @@

 namespace squish {

-ClusterFit::ClusterFit( ColourSet const* colours, int flags ) 
+ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 
  : ColourFit( colours, flags )
 {
 	// set the iteration count
 	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;

-	// initialise the best error
-	m_besterror = VEC4_CONST( FLT_MAX );
-
-	// initialise the metric
-	bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
-	if( perceptual )
-		m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
+	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+	if( metric )
+		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
 	else
 		m_metric = VEC4_CONST( 1.0f );	

+	// initialise the best error
+	m_besterror = VEC4_CONST( FLT_MAX );
+
 	// cache some values
 	int const count = m_colours->GetCount();
 	Vec3 const* values = m_colours->GetPoints();
--- a/Engine/lib/squish/clusterfit.h
+++ b/Engine/lib/squish/clusterfit.h
@ -27,7 +27,7 @@
 #ifndef SQUISH_CLUSTERFIT_H
 #define SQUISH_CLUSTERFIT_H

-#include <squish.h>
+#include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"
@ -37,7 +37,7 @@ namespace squish {
 class ClusterFit : public ColourFit
 {
 public:
-	ClusterFit( ColourSet const* colours, int flags );
+	ClusterFit( ColourSet const* colours, int flags, float* metric );
 	
 private:
 	bool ConstructOrdering( Vec3 const& axis, int iteration );
--- a/Engine/lib/squish/colourblock.h
+++ b/Engine/lib/squish/colourblock.h
@ -26,7 +26,7 @@
 #ifndef SQUISH_COLOURBLOCK_H
 #define SQUISH_COLOURBLOCK_H

-#include <squish.h>
+#include "squish.h"
 #include "maths.h"

 namespace squish {
--- a/Engine/lib/squish/colourfit.cpp
+++ b/Engine/lib/squish/colourfit.cpp
@ -34,6 +34,10 @@ ColourFit::ColourFit( ColourSet const* colours, int flags )
 {
 }

+ColourFit::~ColourFit()
+{
+}
+
 void ColourFit::Compress( void* block )
 {
 	bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
--- a/Engine/lib/squish/colourfit.h
+++ b/Engine/lib/squish/colourfit.h
@ -26,9 +26,11 @@
 #ifndef SQUISH_COLOURFIT_H
 #define SQUISH_COLOURFIT_H

-#include <squish.h>
+#include "squish.h"
 #include "maths.h"

+#include <climits>
+
 namespace squish {

 class ColourSet;
@ -37,6 +39,7 @@ class ColourFit
 {
 public:
 	ColourFit( ColourSet const* colours, int flags );
+	virtual ~ColourFit();

 	void Compress( void* block );

--- a/Engine/lib/squish/colourset.h
+++ b/Engine/lib/squish/colourset.h
@ -26,7 +26,7 @@
 #ifndef SQUISH_COLOURSET_H
 #define SQUISH_COLOURSET_H

-#include <squish.h>
+#include "squish.h"
 #include "maths.h"

 namespace squish {
--- a/Engine/lib/squish/config.h
+++ b/Engine/lib/squish/config.h
@ -36,7 +36,7 @@
 #define SQUISH_USE_SSE 0
 #endif

-// Internally et SQUISH_USE_SIMD when either Altivec or SSE is available.
+// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
 #if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
 #error "Cannot enable both Altivec and SSE!"
 #endif
@ -46,10 +46,4 @@
 #define SQUISH_USE_SIMD 0
 #endif

-// TORQUE MODIFICATIONS
-#ifdef TORQUE_DEBUG
-#  undef SQUISH_USE_SSE
-#  define SQUISH_USE_SSE 0
-#endif
-
 #endif // ndef SQUISH_CONFIG_H
--- a/Engine/lib/squish/maths.cpp
+++ b/Engine/lib/squish/maths.cpp
@ -30,6 +30,7 @@
 */

 #include "maths.h"
+#include "simd.h"
 #include <cfloat>

 namespace squish {
@ -44,7 +45,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
 		total += weights[i];
 		centroid += weights[i]*points[i];
 	}
-	centroid /= total;
+	if( total > FLT_EPSILON )
+		centroid /= total;

 	// accumulate the covariance matrix
 	Sym3x3 covariance( 0.0f );
@ -65,6 +67,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
 	return covariance;
 }

+#if 0
+
 static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
 {
 	// compute M
@ -224,4 +228,32 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 	}
 }

+#else
+
+#define POWER_ITERATION_COUNT 	8
+
+Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
+{
+	Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
+	Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
+	Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
+	Vec4 v = VEC4_CONST( 1.0f );
+	for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
+	{
+		// matrix multiply
+		Vec4 w = row0*v.SplatX();
+		w = MultiplyAdd(row1, v.SplatY(), w);
+		w = MultiplyAdd(row2, v.SplatZ(), w);
+
+		// get max component from xyz in all channels
+		Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
+
+		// divide through and advance
+		v = w*Reciprocal(a);
+	}
+	return v.GetVec3();
+}
+
+#endif
+
 } // namespace squish
--- a/Engine/lib/squish/rangefit.cpp
+++ b/Engine/lib/squish/rangefit.cpp
@ -30,15 +30,14 @@

 namespace squish {

-RangeFit::RangeFit( ColourSet const* colours, int flags ) 
+RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) 
  : ColourFit( colours, flags )
 {
-	// initialise the metric
-	bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
-	if( perceptual )
-		m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f );
+	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+	if( metric )
+		m_metric = Vec3( metric[0], metric[1], metric[2] );
 	else
-		m_metric = Vec3( 1.0f );
+		m_metric = Vec3( 1.0f );	

 	// initialise the best error
 	m_besterror = FLT_MAX;
--- a/Engine/lib/squish/rangefit.h
+++ b/Engine/lib/squish/rangefit.h
@ -26,7 +26,7 @@
 #ifndef SQUISH_RANGEFIT_H
 #define SQUISH_RANGEFIT_H

-#include <squish.h>
+#include "squish.h"
 #include "colourfit.h"
 #include "maths.h"

@ -37,7 +37,7 @@ class ColourSet;
 class RangeFit : public ColourFit
 {
 public:
-	RangeFit( ColourSet const* colours, int flags );
+	RangeFit( ColourSet const* colours, int flags, float* metric );
 	
 private:
 	virtual void Compress3( void* block );
--- a/Engine/lib/squish/simd.h
+++ b/Engine/lib/squish/simd.h
@ -27,14 +27,6 @@
 #define SQUISH_SIMD_H

 #include "maths.h"
-
-#if SQUISH_USE_ALTIVEC
-#include "simd_ve.h"
-#elif SQUISH_USE_SSE
-#include "simd_sse.h"
-#else
 #include "simd_float.h"
-#endif
-

 #endif // ndef SQUISH_SIMD_H
--- a/Engine/lib/squish/simd_sse.h
+++ b/Engine/lib/squish/simd_sse.h
@ -1,180 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_SSE_H
-#define SQUISH_SIMD_SSE_H
-
-#include <xmmintrin.h>
-#if ( SQUISH_USE_SSE > 1 )
-#include <emmintrin.h>
-#endif
-
-#define SQUISH_SSE_SPLAT( a )										\
-	( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
-
-#define SQUISH_SSE_SHUF( x, y, z, w )								\
-	( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) )
-
-namespace squish {
-
-#define VEC4_CONST( X ) Vec4( X )
-
-class Vec4
-{
-public:
-	typedef Vec4 const& Arg;
-
-	Vec4() {}
-		
-	explicit Vec4( __m128 v ) : m_v( v ) {}
-	
-	Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
-	
-	Vec4& operator=( Vec4 const& arg )
-	{
-		m_v = arg.m_v;
-		return *this;
-	}
-	
-	explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {}
-	
-	Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {}
-	
-	Vec3 GetVec3() const
-	{
-#ifdef __GNUC__
-		__attribute__ ((__aligned__ (16))) float c[4];
-#else
-		__declspec(align(16)) float c[4];
-#endif
-		_mm_store_ps( c, m_v );
-		return Vec3( c[0], c[1], c[2] );
-	}
-	
-	Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
-	Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
-	Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
-	Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
-
-	Vec4& operator+=( Arg v )
-	{
-		m_v = _mm_add_ps( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator-=( Arg v )
-	{
-		m_v = _mm_sub_ps( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator*=( Arg v )
-	{
-		m_v = _mm_mul_ps( m_v, v.m_v );
-		return *this;
-	}
-	
-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
-	}
-	
-	//! Returns a*b + c
-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
-	}
-	
-	//! Returns -( a*b - c )
-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
-	}
-	
-	friend Vec4 Reciprocal( Vec4::Arg v )
-	{
-		// get the reciprocal estimate
-		__m128 estimate = _mm_rcp_ps( v.m_v );
-
-		// one round of Newton-Rhaphson refinement
-		__m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
-		return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
-	}
-	
-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Truncate( Vec4::Arg v )
-	{
-#if ( SQUISH_USE_SSE == 1 )
-		// convert to ints
-		__m128 input = v.m_v;
-		__m64 lo = _mm_cvttps_pi32( input );
-		__m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
-
-		// convert to floats
-		__m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
-		__m128 truncated = _mm_cvtpi32_ps( part, lo );
-		
-		// clear out the MMX multimedia state to allow FP calls later
-		_mm_empty(); 
-		return Vec4( truncated );
-#else
-		// use SSE2 instructions
-		return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
-#endif
-	}
-	
-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
-	{
-		__m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
-		int value = _mm_movemask_ps( bits );
-		return value != 0;
-	}
-	
-private:
-	__m128 m_v;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SIMD_SSE_H
--- a/Engine/lib/squish/simd_ve.h
+++ b/Engine/lib/squish/simd_ve.h
@ -1,166 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
-   -------------------------------------------------------------------------- */
-   
-#ifndef SQUISH_SIMD_VE_H
-#define SQUISH_SIMD_VE_H
-
-#include <altivec.h>
-#undef bool
-
-namespace squish {
-
-#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
-
-class Vec4
-{
-public:
-	typedef Vec4 Arg;
-
-	Vec4() {}
-		
-	explicit Vec4( vector float v ) : m_v( v ) {}
-	
-	Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
-	
-	Vec4& operator=( Vec4 const& arg )
-	{
-		m_v = arg.m_v;
-		return *this;
-	}
-	
-	explicit Vec4( float s )
-	{
-		union { vector float v; float c[4]; } u;
-		u.c[0] = s;
-		u.c[1] = s;
-		u.c[2] = s;
-		u.c[3] = s;
-		m_v = u.v;
-	}
-	
-	Vec4( float x, float y, float z, float w )
-	{
-		union { vector float v; float c[4]; } u;
-		u.c[0] = x;
-		u.c[1] = y;
-		u.c[2] = z;
-		u.c[3] = w;
-		m_v = u.v;
-	}
-	
-	Vec3 GetVec3() const
-	{
-		union { vector float v; float c[4]; } u;
-		u.v = m_v;
-		return Vec3( u.c[0], u.c[1], u.c[2] );
-	}
-	
-	Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
-	Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
-	Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
-	Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
-
-	Vec4& operator+=( Arg v )
-	{
-		m_v = vec_add( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator-=( Arg v )
-	{
-		m_v = vec_sub( m_v, v.m_v );
-		return *this;
-	}
-	
-	Vec4& operator*=( Arg v )
-	{
-		m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) );
-		return *this;
-	}
-	
-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( vec_add( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( vec_sub( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
-	{
-		return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) );
-	}
-	
-	//! Returns a*b + c
-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
-	}
-	
-	//! Returns -( a*b - c )
-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
-	{
-		return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
-	}
-	
-	friend Vec4 Reciprocal( Vec4::Arg v )
-	{
-		// get the reciprocal estimate
-		vector float estimate = vec_re( v.m_v );
-		
-		// one round of Newton-Rhaphson refinement
-		vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) );
-		return Vec4( vec_madd( diff, estimate, estimate ) );
-	}
-	
-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( vec_min( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
-	{
-		return Vec4( vec_max( left.m_v, right.m_v ) );
-	}
-	
-	friend Vec4 Truncate( Vec4::Arg v )
-	{
-		return Vec4( vec_trunc( v.m_v ) );
-	}
-	
-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
-	{
-		return vec_any_lt( left.m_v, right.m_v ) != 0;
-	}
-	
-private:
-	vector float m_v;
-};
-
-} // namespace squish
-
-#endif // ndef SQUISH_SIMD_VE_H
--- a/Engine/lib/squish/singlecolourfit.cpp
+++ b/Engine/lib/squish/singlecolourfit.cpp
@ -26,7 +26,6 @@
 #include "singlecolourfit.h"
 #include "colourset.h"
 #include "colourblock.h"
-#include <limits.h> 

 namespace squish {

--- a/Engine/lib/squish/singlecolourfit.h
+++ b/Engine/lib/squish/singlecolourfit.h
@ -26,7 +26,7 @@
 #ifndef SQUISH_SINGLECOLOURFIT_H
 #define SQUISH_SINGLECOLOURFIT_H

-#include <squish.h>
+#include "squish.h"
 #include "colourfit.h"

 namespace squish {
--- a/Engine/lib/squish/singlecolourlookup.inl
+++ b/Engine/lib/squish/singlecolourlookup.inl
@ -1,3 +1,27 @@
+/* -----------------------------------------------------------------------------
+
+	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+	Permission is hereby granted, free of charge, to any person obtaining
+	a copy of this software and associated documentation files (the 
+	"Software"), to	deal in the Software without restriction, including
+	without limitation the rights to use, copy, modify, merge, publish,
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+
+	The above copyright notice and this permission notice shall be included
+	in all copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+	
+   -------------------------------------------------------------------------- */

 static SingleColourLookup const lookup_5_3[] = 
 {
--- a/Engine/lib/squish/squish-Info.plist
+++ b/Engine/lib/squish/squish-Info.plist
@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>CFBundleDevelopmentRegion</key>
-	<string>English</string>
-	<key>CFBundleExecutable</key>
-	<string>${EXECUTABLE_NAME}</string>
-	<key>CFBundleIdentifier</key>
-	<string>com.sjbrown.squish</string>
-	<key>CFBundleInfoDictionaryVersion</key>
-	<string>6.0</string>
-	<key>CFBundlePackageType</key>
-	<string>FMWK</string>
-	<key>CFBundleSignature</key>
-	<string>????</string>
-	<key>CFBundleVersion</key>
-	<string>1.0</string>
-</dict>
-</plist>
--- a/Engine/lib/squish/squish.cpp
+++ b/Engine/lib/squish/squish.cpp
@ -23,7 +23,7 @@
 	
   -------------------------------------------------------------------------- */
   
-#include <squish.h>
+#include "squish.h"
 #include "colourset.h"
 #include "maths.h"
 #include "rangefit.h"
@ -37,37 +37,58 @@ namespace squish {
 static int FixFlags( int flags )
 {
 	// grab the flag bits
-	int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
+	int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
 	int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
-	int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform );
 	int extra = flags & kWeightColourByAlpha;
 	
 	// set defaults
-	if( method != kDxt3 && method != kDxt5 )
+	if ( method != kDxt3
+	&&   method != kDxt5
+	&&   method != kBc4
+	&&   method != kBc5 )
+	{
 		method = kDxt1;
-	if( fit != kColourRangeFit )
+	}
+	if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
 		fit = kColourClusterFit;
-	if( metric != kColourMetricUniform )
-		metric = kColourMetricPerceptual;
 		
 	// done
-	return method | fit | metric | extra;
+	return method | fit | extra;
 }

-void Compress( u8 const* rgba, void* block, int flags )
-{
-	// compress with full mask
-	CompressMasked( rgba, 0xffff, block, flags );
-}
-
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
 {
 	// fix any bad flags
 	flags = FixFlags( flags );

+	if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
+	{
+		u8 alpha[16*4];
+		for( int i = 0; i < 16; ++i )
+		{
+			alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
+		}
+
+		u8* rBlock = reinterpret_cast< u8* >( block );
+		CompressAlphaDxt5( alpha, mask, rBlock );
+
+		if ( ( flags & ( kBc5 ) ) != 0 )
+		{
+			for( int i = 0; i < 16; ++i )
+			{
+				alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
+			}
+
+			u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
+			CompressAlphaDxt5( alpha, mask, gBlock );
+		}
+
+		return;
+	}
+
 	// get the block locations
 	void* colourBlock = block;
-	void* alphaBock = block;
+	void* alphaBlock = block;
 	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
 		colourBlock = reinterpret_cast< u8* >( block ) + 8;

@ -84,21 +105,21 @@ void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
 	else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
 	{
 		// do a range fit
-		RangeFit fit( &colours, flags );
+		RangeFit fit( &colours, flags, metric );
 		fit.Compress( colourBlock );
 	}
 	else
 	{
 		// default to a cluster fit (could be iterative or not)
-		ClusterFit fit( &colours, flags );
+		ClusterFit fit( &colours, flags, metric );
 		fit.Compress( colourBlock );
 	}
 	
 	// compress alpha separately if necessary
 	if( ( flags & kDxt3 ) != 0 )
-		CompressAlphaDxt3( rgba, mask, alphaBock );
+		CompressAlphaDxt3( rgba, mask, alphaBlock );
 	else if( ( flags & kDxt5 ) != 0 )
-		CompressAlphaDxt5( rgba, mask, alphaBock );
+		CompressAlphaDxt5( rgba, mask, alphaBlock );
 }

 void Decompress( u8* rgba, void const* block, int flags )
@ -129,18 +150,18 @@ int GetStorageRequirements( int width, int height, int flags )
 	
 	// compute the storage requirements
 	int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
-	int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-	return blockcount*blocksize;	
+	int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+	return blockcount*blocksize;
 }

-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags )
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
 {
 	// fix any bad flags
 	flags = FixFlags( flags );

 	// initialise the block output
 	u8* targetBlock = reinterpret_cast< u8* >( blocks );
-	int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
+	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;

 	// loop over blocks
 	for( int y = 0; y < height; y += 4 )
@ -179,7 +200,7 @@ void CompressImage( u8 const* rgba, int width, int height, void* blocks, int fla
 			}
 			
 			// compress it into the output
-			CompressMasked( sourceRgba, mask, targetBlock, flags );
+			CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
 			
 			// advance
 			targetBlock += bytesPerBlock;
@ -194,7 +215,7 @@ void DecompressImage( u8* rgba, int width, int height, void const* blocks, int f

 	// initialise the block input
 	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
-	int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
+	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;

 	// loop over blocks
 	for( int y = 0; y < height; y += 4 )
--- a/Engine/lib/squish/squish.h
+++ b/Engine/lib/squish/squish.h
@ -39,74 +39,42 @@ typedef unsigned char u8;
 enum
 {
 	//! Use DXT1 compression.
-	kDxt1 = ( 1 << 0 ), 
-	
+	kDxt1 = ( 1 << 0 ),
+
 	//! Use DXT3 compression.
-	kDxt3 = ( 1 << 1 ), 
-	
+	kDxt3 = ( 1 << 1 ),
+
 	//! Use DXT5 compression.
-	kDxt5 = ( 1 << 2 ), 
-	
-	//! Use a very slow but very high quality colour compressor.
-	kColourIterativeClusterFit = ( 1 << 8 ),	
-	
+	kDxt5 = ( 1 << 2 ),
+
+	//! Use BC4 compression.
+	kBc4 = ( 1 << 3 ),
+
+	//! Use BC5 compression.
+	kBc5 = ( 1 << 4 ),
+
 	//! Use a slow but high quality colour compressor (the default).
-	kColourClusterFit = ( 1 << 3 ),	
-	
+	kColourClusterFit = ( 1 << 5 ),
+
 	//! Use a fast but low quality colour compressor.
-	kColourRangeFit	= ( 1 << 4 ),
-	
-	//! Use a perceptual metric for colour error (the default).
-	kColourMetricPerceptual = ( 1 << 5 ),
+	kColourRangeFit	= ( 1 << 6 ),

-	//! Use a uniform metric for colour error.
-	kColourMetricUniform = ( 1 << 6 ),
-	
 	//! Weight the colour by alpha during cluster fit (disabled by default).
-	kWeightColourByAlpha = ( 1 << 7 )
+	kWeightColourByAlpha = ( 1 << 7 ),
+
+	//! Use a very slow but very high quality colour compressor.
+	kColourIterativeClusterFit = ( 1 << 8 ),
 };

 // -----------------------------------------------------------------------------

-/*! @brief Compresses a 4x4 block of pixels.
-
-	@param rgba		The rgba values of the 16 source pixels.
-	@param block	Storage for the compressed DXT block.
-	@param flags	Compression flags.
-	
-	The source pixels should be presented as a contiguous array of 16 rgba
-	values, with each component as 1 byte each. In memory this should be:
-	
-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
-	
-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
-	however, DXT1 will be used by default if none is specified. When using DXT1 
-	compression, 8 bytes of storage are required for the compressed DXT block. 
-	DXT3 and DXT5 compression require 16 bytes of storage per block.
-	
-	The flags parameter can also specify a preferred colour compressor and 
-	colour error metric to use when fitting the RGB components of the data. 
-	Possible colour compressors are: kColourClusterFit (the default), 
-	kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics 
-	are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no 
-	flags are specified in any particular category then the default will be 
-	used. Unknown flags are ignored.
-	
-	When using kColourClusterFit, an additional flag can be specified to
-	weight the colour of each pixel by its alpha value. For images that are
-	rendered using alpha blending, this can significantly increase the 
-	perceived quality.
-*/
-void Compress( u8 const* rgba, void* block, int flags );
-
-// -----------------------------------------------------------------------------
-
 /*! @brief Compresses a 4x4 block of pixels.

 	@param rgba		The rgba values of the 16 source pixels.
 	@param mask		The valid pixel mask.
 	@param block	Storage for the compressed DXT block.
 	@param flags	Compression flags.
+	@param metric	An optional perceptual metric.
 	
 	The source pixels should be presented as a contiguous array of 16 rgba
 	values, with each component as 1 byte each. In memory this should be:
@ -125,20 +93,68 @@ void Compress( u8 const* rgba, void* block, int flags );
 	compression, 8 bytes of storage are required for the compressed DXT block. 
 	DXT3 and DXT5 compression require 16 bytes of storage per block.
 	
-	The flags parameter can also specify a preferred colour compressor and 
-	colour error metric to use when fitting the RGB components of the data. 
-	Possible colour compressors are: kColourClusterFit (the default), 
-	kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics 
-	are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no 
-	flags are specified in any particular category then the default will be 
-	used. Unknown flags are ignored.
+	The flags parameter can also specify a preferred colour compressor to use 
+	when fitting the RGB components of the data. Possible colour compressors 
+	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
+	quality) or kColourIterativeClusterFit (slowest, best quality).
+		
+	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
+	flag can be specified to weight the importance of each pixel by its alpha 
+	value. For images that are rendered using alpha blending, this can 
+	significantly increase the perceived quality.
 	
-	When using kColourClusterFit, an additional flag can be specified to
-	weight the colour of each pixel by its alpha value. For images that are
-	rendered using alpha blending, this can significantly increase the 
-	perceived quality.
+	The metric parameter can be used to weight the relative importance of each
+	colour channel, or pass NULL to use the default uniform weight of 
+	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
+	allowed either uniform or "perceptual" weights with the fixed values
+	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
+	contiguous array of 3 floats.
 */
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags );
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
+
+// -----------------------------------------------------------------------------
+
+/*! @brief Compresses a 4x4 block of pixels.
+
+	@param rgba		The rgba values of the 16 source pixels.
+	@param block	Storage for the compressed DXT block.
+	@param flags	Compression flags.
+	@param metric	An optional perceptual metric.
+	
+	The source pixels should be presented as a contiguous array of 16 rgba
+	values, with each component as 1 byte each. In memory this should be:
+	
+		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
+	
+	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
+	however, DXT1 will be used by default if none is specified. When using DXT1 
+	compression, 8 bytes of storage are required for the compressed DXT block. 
+	DXT3 and DXT5 compression require 16 bytes of storage per block.
+	
+	The flags parameter can also specify a preferred colour compressor to use 
+	when fitting the RGB components of the data. Possible colour compressors 
+	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
+	quality) or kColourIterativeClusterFit (slowest, best quality).
+		
+	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
+	flag can be specified to weight the importance of each pixel by its alpha 
+	value. For images that are rendered using alpha blending, this can 
+	significantly increase the perceived quality.
+	
+	The metric parameter can be used to weight the relative importance of each
+	colour channel, or pass NULL to use the default uniform weight of 
+	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
+	allowed either uniform or "perceptual" weights with the fixed values
+	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
+	contiguous array of 3 floats.
+	
+	This method is an inline that calls CompressMasked with a mask of 0xffff, 
+	provided for compatibility with older versions of squish.
+*/
+inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
+{
+	CompressMasked( rgba, 0xffff, block, flags, metric );
+}

 // -----------------------------------------------------------------------------

@ -186,6 +202,7 @@ int GetStorageRequirements( int width, int height, int flags );
 	@param height	The height of the source image.
 	@param blocks	Storage for the compressed output.
 	@param flags	Compression flags.
+	@param metric	An optional perceptual metric.
 	
 	The source pixels should be presented as a contiguous array of width*height
 	rgba values, with each component as 1 byte each. In memory this should be:
@ -197,24 +214,29 @@ int GetStorageRequirements( int width, int height, int flags );
 	compression, 8 bytes of storage are required for each compressed DXT block. 
 	DXT3 and DXT5 compression require 16 bytes of storage per block.
 	
-	The flags parameter can also specify a preferred colour compressor and 
-	colour error metric to use when fitting the RGB components of the data. 
-	Possible colour compressors are: kColourClusterFit (the default), 
-	kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics 
-	are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no 
-	flags are specified in any particular category then the default will be 
-	used. Unknown flags are ignored.
+	The flags parameter can also specify a preferred colour compressor to use 
+	when fitting the RGB components of the data. Possible colour compressors 
+	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
+	quality) or kColourIterativeClusterFit (slowest, best quality).
+		
+	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
+	flag can be specified to weight the importance of each pixel by its alpha 
+	value. For images that are rendered using alpha blending, this can 
+	significantly increase the perceived quality.
 	
-	When using kColourClusterFit, an additional flag can be specified to
-	weight the colour of each pixel by its alpha value. For images that are
-	rendered using alpha blending, this can significantly increase the 
-	perceived quality.
+	The metric parameter can be used to weight the relative importance of each
+	colour channel, or pass NULL to use the default uniform weight of 
+	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
+	allowed either uniform or "perceptual" weights with the fixed values
+	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
+	contiguous array of 3 floats.
 	
-	Internally this function calls squish::Compress for each block. To see how
-	much memory is required in the compressed image, use
-	squish::GetStorageRequirements.
+	Internally this function calls squish::CompressMasked for each block, which 
+	allows for pixels outside the image to take arbitrary values. The function 
+	squish::GetStorageRequirements can be called to compute the amount of memory
+	to allocate for the compressed output.
 */
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags );
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );

 // -----------------------------------------------------------------------------

--- a/Engine/lib/squish/texture_compression_s3tc.txt
+++ b/Engine/lib/squish/texture_compression_s3tc.txt
@ -1,508 +0,0 @@
-Name
-
-    EXT_texture_compression_s3tc
-
-Name Strings
-
-    GL_EXT_texture_compression_s3tc
-
-Contact
-
-    Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
-
-Status
-
-    FINAL
-
-Version
-
-    1.1, 16 November 2001 (containing only clarifications relative to
-                           version 1.0, dated 7 July 2000)
-
-Number
-
-    198
-
-Dependencies
-
-    OpenGL 1.1 is required.
-
-    GL_ARB_texture_compression is required.
-
-    This extension is written against the OpenGL 1.2.1 Specification.
-
-Overview
-
-    This extension provides additional texture compression functionality
-    specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
-    subject to all the requirements and limitations described by the extension
-    GL_ARB_texture_compression.
-
-    This extension supports DXT1, DXT3, and DXT5 texture compression formats.
-    For the DXT1 image format, this specification supports an RGB-only mode
-    and a special RGBA mode with single-bit "transparent" alpha.
-
-IP Status
-
-    Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
-    property issues associated with implementing this extension.
-
-    WARNING:  Vendors able to support S3TC texture compression in Direct3D
-    drivers do not necessarily have the right to use the same functionality in
-    OpenGL.
-
-Issues
-
-    (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
-
-        RESOLVED:  No -- insufficient interest.  Supporting DXT2 and DXT4
-        would require some rework to the TexEnv definition (maybe add a new
-        base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
-        Note that the EXT_texture_env_combine extension (which extends normal
-        TexEnv modes) can be used to support textures with premultipled alpha.
-
-    (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
-        or should we use only the DXT<n> enums?  
-
-        RESOLVED:  No.  A generic RGBA_S3TC_EXT is problematic because DXT3
-        and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
-        also) yet one format must be chosen up front.
-
-    (3) Should TexSubImage support all block-aligned edits or just the minimal
-        functionality required by the ARB_texture_compression extension?
-
-        RESOLVED:  Allow all valid block-aligned edits.
-
-    (4) A pre-compressed image with a DXT1 format can be used as either an
-        RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image.  If the image has
-        transparent texels, how are they treated in each format?
-
-        RESOLVED:  The renderer has to make sure that an RGB_S3TC_DXT1 format
-        is decoded as RGB (where alpha is effectively one for all texels),
-        while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
-        texels with "transparent" encodings).  Otherwise, the formats are
-        identical.
-
-    (5) Is the encoding of the RGB components for DXT1 formats correct in this
-        spec?  MSDN documentation does not specify an RGB color for the
-        "transparent" encoding.  Is it really black?
-
-        RESOLVED:  Yes.  The specification for the DXT1 format initially
-        required black, but later changed that requirement to a
-        recommendation.  All vendors involved in the definition of this
-        specification support black.  In addition, specifying black has a
-        useful behavior.
-
-        When blending multiple texels (GL_LINEAR filtering), mixing opaque and
-        transparent samples is problematic.  Defining a black color on
-        transparent texels achieves a sensible result that works like a
-        texture with premultiplied alpha.  For example, if three opaque white
-        and one transparent sample is being averaged, the result would be a
-        75% intensity gray (with an alpha of 75%).  This is the same result on
-        the color channels as would be obtained using a white color, 75%
-        alpha, and a SRC_ALPHA blend factor.
-
-    (6) Is the encoding of the RGB components for DXT3 and DXT5 formats
-        correct in this spec?  MSDN documentation suggests that the RGB blocks
-        for DXT3 and DXT5 are decoded as described by the DXT1 format.
-
-        RESOLVED:  Yes -- this appears to be a bug in the MSDN documentation.
-        The specification for the DXT2-DXT5 formats require decoding using the
-        opaque block encoding, regardless of the relative values of "color0"
-        and "color1".
-
-New Procedures and Functions
-
-    None.
-
-New Tokens
-
-    Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
-    and CompressedTexImage2DARB and the <format> parameter of
-    CompressedTexSubImage2DARB:
-
-        COMPRESSED_RGB_S3TC_DXT1_EXT                   0x83F0
-        COMPRESSED_RGBA_S3TC_DXT1_EXT                  0x83F1
-        COMPRESSED_RGBA_S3TC_DXT3_EXT                  0x83F2
-        COMPRESSED_RGBA_S3TC_DXT5_EXT                  0x83F3
-
-Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
-
-    None.
-
-Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
-
-    Add to Table 3.16.1:  Specific Compressed Internal Formats
-
-        Compressed Internal Format         Base Internal Format
-        ==========================         ====================
-        COMPRESSED_RGB_S3TC_DXT1_EXT       RGB
-        COMPRESSED_RGBA_S3TC_DXT1_EXT      RGBA
-        COMPRESSED_RGBA_S3TC_DXT3_EXT      RGBA
-        COMPRESSED_RGBA_S3TC_DXT5_EXT      RGBA
-
-    
-    Modify Section 3.8.2, Alternate Image Specification
-
-    (add to end of TexSubImage discussion, p.123 -- after edit from the
-    ARB_texture_compression spec)
-
-    If the internal format of the texture image being modified is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
-    texture is stored using one of the several S3TC compressed texture image
-    formats.  Such images are easily edited along 4x4 texel boundaries, so the
-    limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
-    TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
-    error only if one of the following conditions occurs:
-
-        * <width> is not a multiple of four or equal to TEXTURE_WIDTH, 
-          unless <xoffset> and <yoffset> are both zero.
-        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
-          unless <xoffset> and <yoffset> are both zero.
-        * <xoffset> or <yoffset> is not a multiple of four.
-
-    The contents of any 4x4 block of texels of an S3TC compressed texture
-    image that does not intersect the area being modified are preserved during
-    valid TexSubImage2D and CopyTexSubImage2D calls.
-
-
-    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
-    the CompressedTexImage section introduced by the ARB_texture_compression
-    spec)
-
-    If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
-    COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
-    of several S3TC compressed texture image formats.  The S3TC texture
-    compression algorithm supports only 2D images without borders.
-    CompressedTexImage1DARB and CompressedTexImage3DARB produce an
-    INVALID_ENUM error if <internalformat> is an S3TC format.
-    CompressedTexImage2DARB will produce an INVALID_OPERATION error if
-    <border> is non-zero.
-
-
-    Add to Section 3.8.2, Alternate Image Specification (adding to the end of
-    the CompressedTexSubImage section introduced by the
-    ARB_texture_compression spec)
-
-    If the internal format of the texture image being modified is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
-    texture is stored using one of the several S3TC compressed texture image
-    formats.  Since the S3TC texture compression algorithm supports only 2D
-    images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
-    an INVALID_ENUM error if <format> is an S3TC format.  Since S3TC images
-    are easily edited along 4x4 texel boundaries, the limitations on
-    CompressedTexSubImage2D are relaxed.  CompressedTexSubImage2D will result
-    in an INVALID_OPERATION error only if one of the following conditions
-    occurs:
-
-        * <width> is not a multiple of four or equal to TEXTURE_WIDTH.
-        * <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
-        * <xoffset> or <yoffset> is not a multiple of four.
-
-    The contents of any 4x4 block of texels of an S3TC compressed texture
-    image that does not intersect the area being modified are preserved during
-    valid TexSubImage2D and CopyTexSubImage2D calls.
-
-Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
-Operations and the Frame Buffer)
-
-    None.
-
-Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
-
-    None.
-
-Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
-State Requests)
-
-    None.
-
-Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
-
-    None.
-
-Additions to the AGL/GLX/WGL Specifications
-
-    None.
-
-GLX Protocol
-
-    None.
-
-Errors
-
-    INVALID_ENUM is generated by CompressedTexImage1DARB or
-    CompressedTexImage3DARB if <internalformat> is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
-
-    INVALID_OPERATION is generated by CompressedTexImage2DARB if
-    <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
-    COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
-
-    INVALID_ENUM is generated by CompressedTexSubImage1DARB or
-    CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
-    COMPRESSED_RGBA_S3TC_DXT5_EXT.
-
-    INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
-    CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
-    COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
-    COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
-    the following apply: <width> is not a multiple of four or equal to
-    TEXTURE_WIDTH; <height> is not a multiple of four or equal to
-    TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
-
-
-    The following restrictions from the ARB_texture_compression specification
-    do not apply to S3TC texture formats, since subimage modification is
-    straightforward as long as the subimage is properly aligned.
-
-    DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
-    DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
-    DELETE: CopyTexSubImage3D if the internal format of the texture image is
-    DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
-    DELETE: -b, where b is value of TEXTURE_BORDER.
-
-    DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
-    DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
-    DELETE: entire texture image is not being edited:  if <xoffset>,
-    DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
-    DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
-    DELETE: + <depth> is less than d+b, where b is the value of
-    DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
-    DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
-
-    See also errors in the GL_ARB_texture_compression specification.
-
-New State
-
-    In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
-    subscript for Z by 4 in the "Type" row.
-
-New Implementation Dependent State
-
-    None
-
-Appendix
-
-    S3TC Compressed Texture Image Formats
-
-    Compressed texture images stored using the S3TC compressed image formats
-    are represented as a collection of 4x4 texel blocks, where each block
-    contains 64 or 128 bits of texel data.  The image is encoded as a normal
-    2D raster image in which each 4x4 block is treated as a single pixel.  If
-    an S3TC image has a width or height less than four, the data corresponding
-    to texels outside the image are irrelevant and undefined.
-
-    When an S3TC image with a width of <w>, height of <h>, and block size of
-    <blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
-    bytes) is:
-    
-        ceil(<w>/4) * ceil(<h>/4) * blocksize.
-
-    When decoding an S3TC image, the block containing the texel at offset
-    (<x>, <y>) begins at an offset (in bytes) relative to the base of the
-    image of:
-
-        blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
-
-    The data corresponding to a specific texel (<x>, <y>) are extracted from a
-    4x4 texel block using a relative (x,y) value of
-    
-        (<x> modulo 4, <y> modulo 4).
-
-    There are four distinct S3TC image formats:
-
-    COMPRESSED_RGB_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
-    bits of RGB image data.  
-
-    Each RGB image data block is encoded as a sequence of 8 bytes, called (in
-    order of increasing address):
-
-            c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
-
-        The 8 bytes of the block are decoded into three quantities:
-
-            color0 = c0_lo + c0_hi * 256
-            color1 = c1_lo + c1_hi * 256
-            bits   = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
-        
-        color0 and color1 are 16-bit unsigned integers that are unpacked to
-        RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
-        a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
-
-        bits is a 32-bit unsigned integer, from which a two-bit control code
-        is extracted for a texel at location (x,y) in the block using:
-
-            code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
-        
-        where bit 31 is the most significant and bit 0 is the least
-        significant bit.
-
-        The RGB color for a texel at location (x,y) in the block is given by:
-
-            RGB0,              if color0 > color1 and code(x,y) == 0
-            RGB1,              if color0 > color1 and code(x,y) == 1
-            (2*RGB0+RGB1)/3,   if color0 > color1 and code(x,y) == 2
-            (RGB0+2*RGB1)/3,   if color0 > color1 and code(x,y) == 3
-
-            RGB0,              if color0 <= color1 and code(x,y) == 0
-            RGB1,              if color0 <= color1 and code(x,y) == 1
-            (RGB0+RGB1)/2,     if color0 <= color1 and code(x,y) == 2
-            BLACK,             if color0 <= color1 and code(x,y) == 3
-
-        Arithmetic operations are done per component, and BLACK refers to an
-        RGB color where red, green, and blue are all zero.
-
-    Since this image has an RGB format, there is no alpha component and the
-    image is considered fully opaque.
-
-
-    COMPRESSED_RGBA_S3TC_DXT1_EXT:  Each 4x4 block of texels consists of 64
-    bits of RGB image data and minimal alpha information.  The RGB components
-    of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
- 
-        The alpha component for a texel at location (x,y) in the block is
-        given by:
-
-            0.0,               if color0 <= color1 and code(x,y) == 3
-            1.0,               otherwise
-
-        IMPORTANT:  When encoding an RGBA image into a format using 1-bit
-        alpha, any texels with an alpha component less than 0.5 end up with an
-        alpha of 0.0 and any texels with an alpha component greater than or
-        equal to 0.5 end up with an alpha of 1.0.  When encoding an RGBA image
-        into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
-        green, and blue components of any texels with a final alpha of 0.0
-        will automatically be zero (black).  If this behavior is not desired
-        by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
-        This format will never be used when a generic compressed internal
-        format (Table 3.16.2) is specified, although the nearly identical
-        format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
-
-
-    COMPRESSED_RGBA_S3TC_DXT3_EXT:  Each 4x4 block of texels consists of 64
-    bits of uncompressed alpha image data followed by 64 bits of RGB image
-    data.  
-
-    Each RGB image data block is encoded according to the
-    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
-    bits always use the non-transparent encodings.  In other words, they are
-    treated as though color0 > color1, regardless of the actual values of
-    color0 and color1.
-
-    Each alpha image data block is encoded as a sequence of 8 bytes, called
-    (in order of increasing address):
-
-            a0, a1, a2, a3, a4, a5, a6, a7
-
-        The 8 bytes of the block are decoded into one 64-bit integer:
-
-            alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
-                         256 * (a5 + 256 * (a6 + 256 * a7))))))
-
-        alpha is a 64-bit unsigned integer, from which a four-bit alpha value
-        is extracted for a texel at location (x,y) in the block using:
-
-            alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
-
-        where bit 63 is the most significant and bit 0 is the least
-        significant bit.
-
-        The alpha component for a texel at location (x,y) in the block is
-        given by alpha(x,y) / 15.
-
- 
-    COMPRESSED_RGBA_S3TC_DXT5_EXT:  Each 4x4 block of texels consists of 64
-    bits of compressed alpha image data followed by 64 bits of RGB image data.
-
-    Each RGB image data block is encoded according to the
-    COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
-    bits always use the non-transparent encodings.  In other words, they are
-    treated as though color0 > color1, regardless of the actual values of
-    color0 and color1.
-
-    Each alpha image data block is encoded as a sequence of 8 bytes, called
-    (in order of increasing address):
-
-        alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
-
-        The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
-        components by multiplying by 1/255.
-
-        The 6 "bits" bytes of the block are decoded into one 48-bit integer:
-
-          bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + 
-                          256 * (bits_4 + 256 * bits_5))))
-
-        bits is a 48-bit unsigned integer, from which a three-bit control code
-        is extracted for a texel at location (x,y) in the block using:
-
-            code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
-
-        where bit 47 is the most significant and bit 0 is the least
-        significant bit.
-
-        The alpha component for a texel at location (x,y) in the block is
-        given by:
-
-              alpha0,                   code(x,y) == 0
-              alpha1,                   code(x,y) == 1
-
-              (6*alpha0 + 1*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 2
-              (5*alpha0 + 2*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 3
-              (4*alpha0 + 3*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 4
-              (3*alpha0 + 4*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 5
-              (2*alpha0 + 5*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 6
-              (1*alpha0 + 6*alpha1)/7,  alpha0 > alpha1 and code(x,y) == 7
-
-              (4*alpha0 + 1*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 2
-              (3*alpha0 + 2*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 3
-              (2*alpha0 + 3*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 4
-              (1*alpha0 + 4*alpha1)/5,  alpha0 <= alpha1 and code(x,y) == 5
-              0.0,                      alpha0 <= alpha1 and code(x,y) == 6
-              1.0,                      alpha0 <= alpha1 and code(x,y) == 7
-
-
-Revision History
-
-    1.1,  11/16/01 pbrown:    Updated contact info, clarified where texels
-                              fall within a single block.
-
-    1.0,  07/07/00 prbrown1:  Published final version agreed to by working
-                              group members.
-
-    0.9,  06/24/00 prbrown1:  Documented that block-aligned TexSubImage calls
-                              do not modify existing texels outside the
-                              modified blocks.  Added caveat to allow for a
-                              (0,0)-anchored TexSubImage operation of
-                              arbitrary size.
-
-    0.7,  04/11/00 prbrown1:  Added issues on DXT1, DXT3, and DXT5 encodings
-                              where the MSDN documentation doesn't match what
-                              is really done.  Added enum values from the
-                              extension registry.
-
-    0.4,  03/28/00 prbrown1:  Updated to reflect final version of the
-                              ARB_texture_compression extension.  Allowed
-                              block-aligned TexSubImage calls.
-
-    0.3,  03/07/00 prbrown1:  Resolved issues pertaining to the format of RGB
-                              blocks in the DXT3 and DXT5 formats (they don't
-                              ever use the "transparent" encoding).  Fixed
-                              decoding of DXT1 blocks.  Pointed out issue of
-                              "transparent" texels in DXT1 encodings having
-                              different behaviors for RGB and RGBA internal
-                              formats.
-
-    0.2,  02/23/00 prbrown1:  Minor revisions; added several issues.
-
-    0.11, 02/17/00 prbrown1:  Slight modification to error semantics
-                              (INVALID_ENUM instead of INVALID_OPERATION).
-
-    0.1,  02/15/00 prbrown1:  Initial revision.