From 8de0b7cbb164a13445917fedef1410765f2ced9d Mon Sep 17 00:00:00 2001
From: thecelloman <silent.mike.hall@gmail.com>
Date: Mon, 4 Mar 2013 14:37:50 -0500
Subject: [PATCH] Update squish library to 1.11

---
 Engine/lib/squish/ChangeLog           |  52 +++++++++++++
 Engine/lib/squish/alpha.cpp           |   6 +-
 Engine/lib/squish/clusterfit.cpp      |   3 +-
 Engine/lib/squish/colourblock.cpp     |   1 -
 Engine/lib/squish/colourset.cpp       |   3 +-
 Engine/lib/squish/maths.cpp           |  33 +++++----
 Engine/lib/squish/maths.h             |  21 +++---
 Engine/lib/squish/simd_float.h        |  24 +++---
 Engine/lib/squish/squish.cpp          |   2 +-
 Engine/lib/squish/squish.h            |   4 -
 Engine/lib/squish/squishMath.cpp      |  62 ----------------
 Engine/lib/squish/squishMath.h        |  28 -------
 Engine/lib/squish/squishOmp.cpp       | 102 --------------------------
 Engine/source/gfx/bitmap/ddsUtils.cpp |   5 +-
 14 files changed, 99 insertions(+), 247 deletions(-)
 create mode 100644 Engine/lib/squish/ChangeLog
 delete mode 100644 Engine/lib/squish/squishMath.cpp
 delete mode 100644 Engine/lib/squish/squishMath.h
 delete mode 100644 Engine/lib/squish/squishOmp.cpp
diff --git a/Engine/lib/squish/ChangeLog b/Engine/lib/squish/ChangeLog
new file mode 100644
index 000000000..ba03f4c57
--- /dev/null
+++ b/Engine/lib/squish/ChangeLog
@@ -0,0 +1,52 @@
+1.10
+* Iterative cluster fit is now considered to be a new compression mode
+* The core cluster fit is now 4x faster using contributions by Ignacio
+Castano from NVIDIA
+* The single colour lookup table has been halved by exploiting symmetry
+
+1.9
+* Added contributed SSE1 truncate implementation
+* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions
+* Cluster fit is now iterative to further reduce image error
+
+1.8
+* Switched from using floor to trunc for much better SSE performance (again)
+* Xcode build now expects libpng in /usr/local for extra/squishpng
+
+1.7
+* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
+* Implemented proper SSE(2) floor function for 50% speedup on SSE builds 
+* The range fit implementation now uses the correct colour metric
+
+1.6
+* Fixed bug in CompressImage where masked pixels were not skipped over
+* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
+* Fixed major DXT1 bug that can generate unexpected transparent pixels
+
+1.5
+* Added CompressMasked function to handle incomplete DXT blocks more cleanly
+* Added kWeightColourByAlpha flag for better quality images when alpha blending
+
+1.4
+* Fixed stack overflow in rangefit
+
+1.3
+* Worked around SSE floor implementation bug, proper fix needed!
+* This release has visual studio and makefile builds that work
+
+1.2
+* Added provably optimal single colour compressor
+* Added extra/squishgen.cpp that generates single colour lookup tables
+
+1.1
+* Fixed a DXT1 colour output bug
+* Changed argument order for Decompress function to match Compress
+* Added GetStorageRequirements function
+* Added CompressImage function
+* Added DecompressImage function
+* Moved squishtool.cpp to extra/squishpng.cpp
+* Added extra/squishtest.cpp
+
+1.0
+* Initial release
+
diff --git a/Engine/lib/squish/alpha.cpp b/Engine/lib/squish/alpha.cpp
index f4a72b474..98c39e6bb 100644
--- a/Engine/lib/squish/alpha.cpp
+++ b/Engine/lib/squish/alpha.cpp
@@ -24,7 +24,7 @@
    -------------------------------------------------------------------------- */
    
 #include "alpha.h"
-#include "squishMath.h"
+#include <algorithm>
 
 namespace squish {
 
@@ -92,9 +92,9 @@ void DecompressAlphaDxt3( u8* rgba, void const* block )
 static void FixRange( int& min, int& max, int steps )
 {
 	if( max - min < steps )
-		max = SquishMath::min( min + steps, 255 );
+		max = std::min( min + steps, 255 );
 	if( max - min < steps )
-		min = SquishMath::max( 0, max - steps );
+		min = std::max( 0, max - steps );
 }
 
 static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
diff --git a/Engine/lib/squish/clusterfit.cpp b/Engine/lib/squish/clusterfit.cpp
index 4ac89984d..afea84880 100644
--- a/Engine/lib/squish/clusterfit.cpp
+++ b/Engine/lib/squish/clusterfit.cpp
@@ -27,8 +27,7 @@
 #include "clusterfit.h"
 #include "colourset.h"
 #include "colourblock.h"
-#include "squishMath.h"
-#include <algorithm>
+#include <cfloat>
 
 namespace squish {
 
diff --git a/Engine/lib/squish/colourblock.cpp b/Engine/lib/squish/colourblock.cpp
index eff96b002..e6a5788b7 100644
--- a/Engine/lib/squish/colourblock.cpp
+++ b/Engine/lib/squish/colourblock.cpp
@@ -24,7 +24,6 @@
    -------------------------------------------------------------------------- */
    
 #include "colourblock.h"
-#include <algorithm>
 
 namespace squish {
 
diff --git a/Engine/lib/squish/colourset.cpp b/Engine/lib/squish/colourset.cpp
index 15cc690c6..97d29d987 100644
--- a/Engine/lib/squish/colourset.cpp
+++ b/Engine/lib/squish/colourset.cpp
@@ -24,7 +24,6 @@
    -------------------------------------------------------------------------- */
    
 #include "colourset.h"
-#include "squishMath.h"
 
 namespace squish {
 
@@ -104,7 +103,7 @@ ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
 
 	// square root the weights
 	for( int i = 0; i < m_count; ++i )
-		m_weights[i] = SquishMath::sqrt( m_weights[i] );
+		m_weights[i] = std::sqrt( m_weights[i] );
 }
 
 void ColourSet::RemapIndices( u8 const* source, u8* target ) const
diff --git a/Engine/lib/squish/maths.cpp b/Engine/lib/squish/maths.cpp
index 37bf51683..59818a4d2 100644
--- a/Engine/lib/squish/maths.cpp
+++ b/Engine/lib/squish/maths.cpp
@@ -30,6 +30,7 @@
 */
 
 #include "maths.h"
+#include <cfloat>
 
 namespace squish {
 
@@ -85,11 +86,11 @@ static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
 	u[5] = m[0]*m[3] - m[1]*m[1];
 
 	// find the largest component
-	float mc = SquishMath::fabs( u[0] );
+	float mc = std::fabs( u[0] );
 	int mi = 0;
 	for( int i = 1; i < 6; ++i )
 	{
-		float c = SquishMath::fabs( u[i] );
+		float c = std::fabs( u[i] );
 		if( c > mc )
 		{
 			mc = c;
@@ -124,11 +125,11 @@ static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
 	m[5] = matrix[5] - evalue;
 
 	// find the largest component
-	float mc = SquishMath::fabs( m[0] );
+	float mc = std::fabs( m[0] );
 	int mi = 0;
 	for( int i = 1; i < 6; ++i )
 	{
-		float c = SquishMath::fabs( m[i] );
+		float c = std::fabs( m[i] );
 		if( c > mc )
 		{
 			mc = c;
@@ -183,21 +184,21 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 	else if( Q < -FLT_EPSILON )
 	{
 		// three distinct roots
-		float theta = SquishMath::atan2( SquishMath::sqrt( -Q ), -0.5f*b );
-		float rho = SquishMath::sqrt( 0.25f*b*b - Q );
+		float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
+		float rho = std::sqrt( 0.25f*b*b - Q );
 
-		float rt = SquishMath::pow( rho, 1.0f/3.0f );
-		float ct = SquishMath::cos( theta/3.0f );
-		float st = SquishMath::sin( theta/3.0f );
+		float rt = std::pow( rho, 1.0f/3.0f );
+		float ct = std::cos( theta/3.0f );
+		float st = std::sin( theta/3.0f );
 
 		float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
-		float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )SquishMath::sqrt( 3.0f )*st );
-		float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )SquishMath::sqrt( 3.0f )*st );
+		float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
+		float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
 
 		// pick the larger
-		if( SquishMath::fabs( l2 ) > SquishMath::fabs( l1 ) )
+		if( std::fabs( l2 ) > std::fabs( l1 ) )
 			l1 = l2;
-		if( SquishMath::fabs( l3 ) > SquishMath::fabs( l1 ) )
+		if( std::fabs( l3 ) > std::fabs( l1 ) )
 			l1 = l3;
 
 		// get the eigenvector
@@ -208,15 +209,15 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 		// two roots
 		float rt;
 		if( b < 0.0f )
-			rt = -SquishMath::pow( -0.5f*b, 1.0f/3.0f );
+			rt = -std::pow( -0.5f*b, 1.0f/3.0f );
 		else
-			rt = SquishMath::pow( 0.5f*b, 1.0f/3.0f );
+			rt = std::pow( 0.5f*b, 1.0f/3.0f );
 		
 		float l1 = ( 1.0f/3.0f )*c2 + rt;		// repeated
 		float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
 		
 		// get the eigenvector
-		if( SquishMath::fabs( l1 ) > SquishMath::fabs( l2 ) )
+		if( std::fabs( l1 ) > std::fabs( l2 ) )
 			return GetMultiplicity2Evector( matrix, l1 );
 		else
 			return GetMultiplicity1Evector( matrix, l2 );
diff --git a/Engine/lib/squish/maths.h b/Engine/lib/squish/maths.h
index 47f455bda..769ae463f 100644
--- a/Engine/lib/squish/maths.h
+++ b/Engine/lib/squish/maths.h
@@ -26,7 +26,8 @@
 #ifndef SQUISH_MATHS_H
 #define SQUISH_MATHS_H
 
-#include "squishMath.h"
+#include <cmath>
+#include <algorithm>
 #include "config.h"
 
 namespace squish {
@@ -162,27 +163,27 @@ public:
 	friend Vec3 Min( Arg left, Arg right )
 	{
 		return Vec3(
-			SquishMath::min( left.m_x, right.m_x ), 
-			SquishMath::min( left.m_y, right.m_y ), 
-			SquishMath::min( left.m_z, right.m_z )
+			std::min( left.m_x, right.m_x ), 
+			std::min( left.m_y, right.m_y ), 
+			std::min( left.m_z, right.m_z )
 		);
 	}
 
 	friend Vec3 Max( Arg left, Arg right )
 	{
 		return Vec3(
-			SquishMath::max( left.m_x, right.m_x ), 
-			SquishMath::max( left.m_y, right.m_y ), 
-			SquishMath::max( left.m_z, right.m_z )
+			std::max( left.m_x, right.m_x ), 
+			std::max( left.m_y, right.m_y ), 
+			std::max( left.m_z, right.m_z )
 		);
 	}
 
 	friend Vec3 Truncate( Arg v )
 	{
 		return Vec3(
-			v.m_x > 0.0f ? SquishMath::floor( v.m_x ) : SquishMath::ceil( v.m_x ), 
-			v.m_y > 0.0f ? SquishMath::floor( v.m_y ) : SquishMath::ceil( v.m_y ), 
-			v.m_z > 0.0f ? SquishMath::floor( v.m_z ) : SquishMath::ceil( v.m_z )
+			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
+			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
+			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
 		);
 	}
 
diff --git a/Engine/lib/squish/simd_float.h b/Engine/lib/squish/simd_float.h
index 6052736f3..e6351b80e 100644
--- a/Engine/lib/squish/simd_float.h
+++ b/Engine/lib/squish/simd_float.h
@@ -135,30 +135,30 @@ public:
 	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
 	{
 		return Vec4( 
-			SquishMath::min( left.m_x, right.m_x ), 
-			SquishMath::min( left.m_y, right.m_y ), 
-			SquishMath::min( left.m_z, right.m_z ), 
-			SquishMath::min( left.m_w, right.m_w ) 
+			std::min( left.m_x, right.m_x ), 
+			std::min( left.m_y, right.m_y ), 
+			std::min( left.m_z, right.m_z ), 
+			std::min( left.m_w, right.m_w ) 
 		);
 	}
 	
 	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
 	{
 		return Vec4( 
-			SquishMath::max( left.m_x, right.m_x ), 
-			SquishMath::max( left.m_y, right.m_y ), 
-			SquishMath::max( left.m_z, right.m_z ), 
-			SquishMath::max( left.m_w, right.m_w ) 
+			std::max( left.m_x, right.m_x ), 
+			std::max( left.m_y, right.m_y ), 
+			std::max( left.m_z, right.m_z ), 
+			std::max( left.m_w, right.m_w ) 
 		);
 	}
 	
 	friend Vec4 Truncate( Vec4::Arg v )
 	{
 		return Vec4(
-			v.m_x > 0.0f ? SquishMath::floor( v.m_x ) : SquishMath::ceil( v.m_x ), 
-			v.m_y > 0.0f ? SquishMath::floor( v.m_y ) : SquishMath::ceil( v.m_y ), 
-			v.m_z > 0.0f ? SquishMath::floor( v.m_z ) : SquishMath::ceil( v.m_z ),
-			v.m_w > 0.0f ? SquishMath::floor( v.m_w ) : SquishMath::ceil( v.m_w )
+			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
+			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
+			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
+			v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
 		);
 	}
 	
diff --git a/Engine/lib/squish/squish.cpp b/Engine/lib/squish/squish.cpp
index 5180738fc..bbe89bfcf 100644
--- a/Engine/lib/squish/squish.cpp
+++ b/Engine/lib/squish/squish.cpp
@@ -34,7 +34,7 @@
 
 namespace squish {
 
-int FixFlags( int flags )
+static int FixFlags( int flags )
 {
 	// grab the flag bits
 	int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
diff --git a/Engine/lib/squish/squish.h b/Engine/lib/squish/squish.h
index b69cba65c..5f5ac149d 100644
--- a/Engine/lib/squish/squish.h
+++ b/Engine/lib/squish/squish.h
@@ -215,7 +215,6 @@ int GetStorageRequirements( int width, int height, int flags );
 	squish::GetStorageRequirements.
 */
 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags );
-void CompressImageOMP( u8 const* rgba, int width, int height, void* blocks, int flags );
 
 // -----------------------------------------------------------------------------
 
@@ -242,9 +241,6 @@ void DecompressImage( u8* rgba, int width, int height, void const* blocks, int f
 
 // -----------------------------------------------------------------------------
 
-// Helper method
-int FixFlags( int flags );
-
 } // namespace squish
 
 #endif // ndef SQUISH_H
diff --git a/Engine/lib/squish/squishMath.cpp b/Engine/lib/squish/squishMath.cpp
deleted file mode 100644
index 9842d4f71..000000000
--- a/Engine/lib/squish/squishMath.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "squishMath.h"
-#include <math.h>
-
-float SquishMath::fabs( const float f )
-{
-   return ::fabs( f );
-}
-
-float SquishMath::pow( const float x, const float y )
-{
-   return ::pow( x, y );
-}
-
-float SquishMath::cos( const float theta )
-{
-   return ::cos( theta );
-}
-
-float SquishMath::sin( const float theta )
-{
-   return ::sin( theta );
-}
-
-float SquishMath::sqrt( const float a )
-{
-   return ::sqrtf( a );
-}
-
-float SquishMath::atan2( const float a, const float b )
-{
-   return ::atan2f( a, b );
-}
-
-float SquishMath::min( const float a, const float b )
-{
-   return a < b ? a : b;
-}
-
-float SquishMath::max( const float a, const float b )
-{
-   return a < b ? b : a;
-}
-
-float SquishMath::floor( const float a )
-{
-   return ::floorf( a );
-}
-
-float SquishMath::ceil( const float a )
-{
-   return ::ceilf( a );
-}
-
-int SquishMath::min( const int a, const int b )
-{
-   return a < b ? a : b;
-}
-
-int SquishMath::max( const int a, const int b )
-{
-   return a < b ? b : a;
-}
\ No newline at end of file
diff --git a/Engine/lib/squish/squishMath.h b/Engine/lib/squish/squishMath.h
deleted file mode 100644
index 6b72e197e..000000000
--- a/Engine/lib/squish/squishMath.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _SQUISH_MATH_H_
-#define _SQUISH_MATH_H_
-
-#define FLT_MAX         3.402823466e+38F
-#define FLT_EPSILON     1.192092896e-07F
-#define INT_MAX       2147483647    /* maximum (signed) int value */
-
-// Abstract the math in squish so it doesn't use std:: directly
-namespace SquishMath
-{
-   float fabs( const float f );
-   float pow( const float x, const float y );
-   float cos( const float theta );
-   float sin( const float theta );
-   float sqrt( const float a );
-   float atan2( const float a, const float b );
-
-   float min( const float a, const float b );
-   float max( const float a, const float b );
-
-   float floor( const float a );
-   float ceil( const float a );
-
-   int min( const int a, const int b );
-   int max( const int a, const int b );
-};
-
-#endif
\ No newline at end of file
diff --git a/Engine/lib/squish/squishOmp.cpp b/Engine/lib/squish/squishOmp.cpp
deleted file mode 100644
index ba4be57d5..000000000
--- a/Engine/lib/squish/squishOmp.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/* -----------------------------------------------------------------------------
-
-Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the 
-"Software"), to	deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to 
-permit persons to whom the Software is furnished to do so, subject to 
-the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
--------------------------------------------------------------------------- */
-#include <squish.h>
-
-//#define ENABLE_OPEN_MP
-
-#ifndef ENABLE_OPEN_MP
-void squish::CompressImageOMP(u8 const* rgba, int width, int height, void* blocks, int flags)
-{
-   squish::CompressImage( rgba, width, height, blocks, flags );
-}
-
-#else
-
-// OMP implementation
-#include <omp.h>
-
-// OpenMP implementation of squish::CompressImage
-//
-// If you have any fixes, improvements, suggestions of: "L2OMP n00b" 
-// please send them to:
-// Pat Wilson
-// patw@garagegames.com
-namespace squish {
-
-struct _blk_row
-{
-   unsigned int pxl[4];
-};
-
-void CompressImageOMP( u8 const* rgba, int width, int height, void* blocks, int flags )
-{
-   // fix any bad flags
-   flags = FixFlags( flags );
-
-   // Should really assert here or something
-   if( width % 4 || height % 4 )
-      return;
-
-   // initialize the block output
-   u8 *const targetBlock = reinterpret_cast<u8 *>( blocks );
-   const int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-   const int blockHeight = height >> 2;
-   const int blockWidth = width >> 2;
-
-#pragma omp parallel 
-{  // begin omp block
-
-   // loop over blocks
-#pragma omp for
-   for( int by = 0; by < blockHeight; by++ )
-   {
-      const int y = by * 4;
-
-      for( int bx = 0; bx < blockWidth; bx++ )
-      {
-         const int x = bx * 4;
-
-         // build the 4x4 block of pixels
-         u8 sourceRgba[16 * 4];
-
-#define _load_row(r) (reinterpret_cast<_blk_row *>(sourceRgba))[r] = (*reinterpret_cast<const _blk_row *>(rgba + 4 * ( width * (y + r) + x )))
-         _load_row(0);
-         _load_row(1);
-         _load_row(2);
-         _load_row(3);
-#undef _load_row
-
-         // compress it into the output
-         const int blockIdx = by * blockWidth + bx;
-         Compress( sourceRgba, &targetBlock[blockIdx * bytesPerBlock], flags );
-      }
-   }
-
-} // end omp block
-
-}
-
-} // namespace
-#endif
diff --git a/Engine/source/gfx/bitmap/ddsUtils.cpp b/Engine/source/gfx/bitmap/ddsUtils.cpp
index e190efcac..42971bfdc 100644
--- a/Engine/source/gfx/bitmap/ddsUtils.cpp
+++ b/Engine/source/gfx/bitmap/ddsUtils.cpp
@@ -88,10 +88,7 @@ bool DDSUtil::squishDDS( DDSFile *srcDDS, const GFXFormat dxtFormat )
       PROFILE_START(SQUISH_DXT_COMPRESS);
 
       // Compress with Squish
-      //
-      // squish::CompressImageOMP will call squish::CompressImage if OpenMP is 
-      // not enabled.
-      squish::CompressImageOMP( srcBits, srcDDS->getWidth(i), srcDDS->getHeight(i), 
+      squish::CompressImage( srcBits, srcDDS->getWidth(i), srcDDS->getHeight(i), 
          dstBits, squishFlags );
 
       PROFILE_END();