mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-04-29 16:25:42 +00:00
Merge pull request #1970 from Azaezel/soSquishy
squish update (primarily to add additional formats for later usage)
This commit is contained in:
commit
f7c852c884
24 changed files with 256 additions and 1072 deletions
|
|
@ -1,52 +0,0 @@
|
||||||
1.10
|
|
||||||
* Iterative cluster fit is now considered to be a new compression mode
|
|
||||||
* The core cluster fit is now 4x faster using contributions by Ignacio
|
|
||||||
Castano from NVIDIA
|
|
||||||
* The single colour lookup table has been halved by exploiting symmetry
|
|
||||||
|
|
||||||
1.9
|
|
||||||
* Added contributed SSE1 truncate implementation
|
|
||||||
* Changed use of SQUISH_USE_SSE to be 1 for SSE and 2 for SSE2 instructions
|
|
||||||
* Cluster fit is now iterative to further reduce image error
|
|
||||||
|
|
||||||
1.8
|
|
||||||
* Switched from using floor to trunc for much better SSE performance (again)
|
|
||||||
* Xcode build now expects libpng in /usr/local for extra/squishpng
|
|
||||||
|
|
||||||
1.7
|
|
||||||
* Fixed floating-point equality issue in clusterfit sort (x86 affected only)
|
|
||||||
* Implemented proper SSE(2) floor function for 50% speedup on SSE builds
|
|
||||||
* The range fit implementation now uses the correct colour metric
|
|
||||||
|
|
||||||
1.6
|
|
||||||
* Fixed bug in CompressImage where masked pixels were not skipped over
|
|
||||||
* DXT3 and DXT5 alpha compression now properly use the mask to ignore pixels
|
|
||||||
* Fixed major DXT1 bug that can generate unexpected transparent pixels
|
|
||||||
|
|
||||||
1.5
|
|
||||||
* Added CompressMasked function to handle incomplete DXT blocks more cleanly
|
|
||||||
* Added kWeightColourByAlpha flag for better quality images when alpha blending
|
|
||||||
|
|
||||||
1.4
|
|
||||||
* Fixed stack overflow in rangefit
|
|
||||||
|
|
||||||
1.3
|
|
||||||
* Worked around SSE floor implementation bug, proper fix needed!
|
|
||||||
* This release has visual studio and makefile builds that work
|
|
||||||
|
|
||||||
1.2
|
|
||||||
* Added provably optimal single colour compressor
|
|
||||||
* Added extra/squishgen.cpp that generates single colour lookup tables
|
|
||||||
|
|
||||||
1.1
|
|
||||||
* Fixed a DXT1 colour output bug
|
|
||||||
* Changed argument order for Decompress function to match Compress
|
|
||||||
* Added GetStorageRequirements function
|
|
||||||
* Added CompressImage function
|
|
||||||
* Added DecompressImage function
|
|
||||||
* Moved squishtool.cpp to extra/squishpng.cpp
|
|
||||||
* Added extra/squishtest.cpp
|
|
||||||
|
|
||||||
1.0
|
|
||||||
* Initial release
|
|
||||||
|
|
||||||
20
Engine/lib/squish/LICENSE
Normal file
20
Engine/lib/squish/LICENSE
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included
|
||||||
|
in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
@ -24,8 +24,9 @@
|
||||||
-------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "alpha.h"
|
#include "alpha.h"
|
||||||
|
|
||||||
|
#include <climits>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <limits.h>
|
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
#ifndef SQUISH_ALPHA_H
|
#ifndef SQUISH_ALPHA_H
|
||||||
#define SQUISH_ALPHA_H
|
#define SQUISH_ALPHA_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,22 +31,21 @@
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
ClusterFit::ClusterFit( ColourSet const* colours, int flags )
|
ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
|
||||||
: ColourFit( colours, flags )
|
: ColourFit( colours, flags )
|
||||||
{
|
{
|
||||||
// set the iteration count
|
// set the iteration count
|
||||||
m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
|
m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
|
||||||
|
|
||||||
// initialise the best error
|
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
|
||||||
m_besterror = VEC4_CONST( FLT_MAX );
|
if( metric )
|
||||||
|
m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
|
||||||
// initialise the metric
|
|
||||||
bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
|
|
||||||
if( perceptual )
|
|
||||||
m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
|
|
||||||
else
|
else
|
||||||
m_metric = VEC4_CONST( 1.0f );
|
m_metric = VEC4_CONST( 1.0f );
|
||||||
|
|
||||||
|
// initialise the best error
|
||||||
|
m_besterror = VEC4_CONST( FLT_MAX );
|
||||||
|
|
||||||
// cache some values
|
// cache some values
|
||||||
int const count = m_colours->GetCount();
|
int const count = m_colours->GetCount();
|
||||||
Vec3 const* values = m_colours->GetPoints();
|
Vec3 const* values = m_colours->GetPoints();
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@
|
||||||
#ifndef SQUISH_CLUSTERFIT_H
|
#ifndef SQUISH_CLUSTERFIT_H
|
||||||
#define SQUISH_CLUSTERFIT_H
|
#define SQUISH_CLUSTERFIT_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
#include "simd.h"
|
#include "simd.h"
|
||||||
#include "colourfit.h"
|
#include "colourfit.h"
|
||||||
|
|
@ -37,7 +37,7 @@ namespace squish {
|
||||||
class ClusterFit : public ColourFit
|
class ClusterFit : public ColourFit
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ClusterFit( ColourSet const* colours, int flags );
|
ClusterFit( ColourSet const* colours, int flags, float* metric );
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool ConstructOrdering( Vec3 const& axis, int iteration );
|
bool ConstructOrdering( Vec3 const& axis, int iteration );
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
#ifndef SQUISH_COLOURBLOCK_H
|
#ifndef SQUISH_COLOURBLOCK_H
|
||||||
#define SQUISH_COLOURBLOCK_H
|
#define SQUISH_COLOURBLOCK_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,10 @@ ColourFit::ColourFit( ColourSet const* colours, int flags )
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ColourFit::~ColourFit()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void ColourFit::Compress( void* block )
|
void ColourFit::Compress( void* block )
|
||||||
{
|
{
|
||||||
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
|
bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,11 @@
|
||||||
#ifndef SQUISH_COLOURFIT_H
|
#ifndef SQUISH_COLOURFIT_H
|
||||||
#define SQUISH_COLOURFIT_H
|
#define SQUISH_COLOURFIT_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
|
|
||||||
|
#include <climits>
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
class ColourSet;
|
class ColourSet;
|
||||||
|
|
@ -37,6 +39,7 @@ class ColourFit
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ColourFit( ColourSet const* colours, int flags );
|
ColourFit( ColourSet const* colours, int flags );
|
||||||
|
virtual ~ColourFit();
|
||||||
|
|
||||||
void Compress( void* block );
|
void Compress( void* block );
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
#ifndef SQUISH_COLOURSET_H
|
#ifndef SQUISH_COLOURSET_H
|
||||||
#define SQUISH_COLOURSET_H
|
#define SQUISH_COLOURSET_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@
|
||||||
#define SQUISH_USE_SSE 0
|
#define SQUISH_USE_SSE 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Internally et SQUISH_USE_SIMD when either Altivec or SSE is available.
|
// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
|
||||||
#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
|
#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
|
||||||
#error "Cannot enable both Altivec and SSE!"
|
#error "Cannot enable both Altivec and SSE!"
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -46,10 +46,4 @@
|
||||||
#define SQUISH_USE_SIMD 0
|
#define SQUISH_USE_SIMD 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TORQUE MODIFICATIONS
|
|
||||||
#ifdef TORQUE_DEBUG
|
|
||||||
# undef SQUISH_USE_SSE
|
|
||||||
# define SQUISH_USE_SSE 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // ndef SQUISH_CONFIG_H
|
#endif // ndef SQUISH_CONFIG_H
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
|
#include "simd.h"
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
@ -44,7 +45,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
|
||||||
total += weights[i];
|
total += weights[i];
|
||||||
centroid += weights[i]*points[i];
|
centroid += weights[i]*points[i];
|
||||||
}
|
}
|
||||||
centroid /= total;
|
if( total > FLT_EPSILON )
|
||||||
|
centroid /= total;
|
||||||
|
|
||||||
// accumulate the covariance matrix
|
// accumulate the covariance matrix
|
||||||
Sym3x3 covariance( 0.0f );
|
Sym3x3 covariance( 0.0f );
|
||||||
|
|
@ -65,6 +67,8 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
|
||||||
return covariance;
|
return covariance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
|
static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
|
||||||
{
|
{
|
||||||
// compute M
|
// compute M
|
||||||
|
|
@ -224,4 +228,32 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define POWER_ITERATION_COUNT 8
|
||||||
|
|
||||||
|
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
|
||||||
|
{
|
||||||
|
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
|
||||||
|
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
|
||||||
|
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
|
||||||
|
Vec4 v = VEC4_CONST( 1.0f );
|
||||||
|
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
|
||||||
|
{
|
||||||
|
// matrix multiply
|
||||||
|
Vec4 w = row0*v.SplatX();
|
||||||
|
w = MultiplyAdd(row1, v.SplatY(), w);
|
||||||
|
w = MultiplyAdd(row2, v.SplatZ(), w);
|
||||||
|
|
||||||
|
// get max component from xyz in all channels
|
||||||
|
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
|
||||||
|
|
||||||
|
// divide through and advance
|
||||||
|
v = w*Reciprocal(a);
|
||||||
|
}
|
||||||
|
return v.GetVec3();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace squish
|
} // namespace squish
|
||||||
|
|
|
||||||
|
|
@ -30,15 +30,14 @@
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
RangeFit::RangeFit( ColourSet const* colours, int flags )
|
RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric )
|
||||||
: ColourFit( colours, flags )
|
: ColourFit( colours, flags )
|
||||||
{
|
{
|
||||||
// initialise the metric
|
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
|
||||||
bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
|
if( metric )
|
||||||
if( perceptual )
|
m_metric = Vec3( metric[0], metric[1], metric[2] );
|
||||||
m_metric = Vec3( 0.2126f, 0.7152f, 0.0722f );
|
|
||||||
else
|
else
|
||||||
m_metric = Vec3( 1.0f );
|
m_metric = Vec3( 1.0f );
|
||||||
|
|
||||||
// initialise the best error
|
// initialise the best error
|
||||||
m_besterror = FLT_MAX;
|
m_besterror = FLT_MAX;
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
#ifndef SQUISH_RANGEFIT_H
|
#ifndef SQUISH_RANGEFIT_H
|
||||||
#define SQUISH_RANGEFIT_H
|
#define SQUISH_RANGEFIT_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "colourfit.h"
|
#include "colourfit.h"
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
|
|
||||||
|
|
@ -37,7 +37,7 @@ class ColourSet;
|
||||||
class RangeFit : public ColourFit
|
class RangeFit : public ColourFit
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
RangeFit( ColourSet const* colours, int flags );
|
RangeFit( ColourSet const* colours, int flags, float* metric );
|
||||||
|
|
||||||
private:
|
private:
|
||||||
virtual void Compress3( void* block );
|
virtual void Compress3( void* block );
|
||||||
|
|
|
||||||
|
|
@ -27,14 +27,6 @@
|
||||||
#define SQUISH_SIMD_H
|
#define SQUISH_SIMD_H
|
||||||
|
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
|
|
||||||
#if SQUISH_USE_ALTIVEC
|
|
||||||
#include "simd_ve.h"
|
|
||||||
#elif SQUISH_USE_SSE
|
|
||||||
#include "simd_sse.h"
|
|
||||||
#else
|
|
||||||
#include "simd_float.h"
|
#include "simd_float.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif // ndef SQUISH_SIMD_H
|
#endif // ndef SQUISH_SIMD_H
|
||||||
|
|
|
||||||
|
|
@ -1,180 +0,0 @@
|
||||||
/* -----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
a copy of this software and associated documentation files (the
|
|
||||||
"Software"), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included
|
|
||||||
in all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
||||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#ifndef SQUISH_SIMD_SSE_H
|
|
||||||
#define SQUISH_SIMD_SSE_H
|
|
||||||
|
|
||||||
#include <xmmintrin.h>
|
|
||||||
#if ( SQUISH_USE_SSE > 1 )
|
|
||||||
#include <emmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SQUISH_SSE_SPLAT( a ) \
|
|
||||||
( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
|
|
||||||
|
|
||||||
#define SQUISH_SSE_SHUF( x, y, z, w ) \
|
|
||||||
( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) )
|
|
||||||
|
|
||||||
namespace squish {
|
|
||||||
|
|
||||||
#define VEC4_CONST( X ) Vec4( X )
|
|
||||||
|
|
||||||
class Vec4
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef Vec4 const& Arg;
|
|
||||||
|
|
||||||
Vec4() {}
|
|
||||||
|
|
||||||
explicit Vec4( __m128 v ) : m_v( v ) {}
|
|
||||||
|
|
||||||
Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
|
|
||||||
|
|
||||||
Vec4& operator=( Vec4 const& arg )
|
|
||||||
{
|
|
||||||
m_v = arg.m_v;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {}
|
|
||||||
|
|
||||||
Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {}
|
|
||||||
|
|
||||||
Vec3 GetVec3() const
|
|
||||||
{
|
|
||||||
#ifdef __GNUC__
|
|
||||||
__attribute__ ((__aligned__ (16))) float c[4];
|
|
||||||
#else
|
|
||||||
__declspec(align(16)) float c[4];
|
|
||||||
#endif
|
|
||||||
_mm_store_ps( c, m_v );
|
|
||||||
return Vec3( c[0], c[1], c[2] );
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
|
|
||||||
Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
|
|
||||||
Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
|
|
||||||
Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
|
|
||||||
|
|
||||||
Vec4& operator+=( Arg v )
|
|
||||||
{
|
|
||||||
m_v = _mm_add_ps( m_v, v.m_v );
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4& operator-=( Arg v )
|
|
||||||
{
|
|
||||||
m_v = _mm_sub_ps( m_v, v.m_v );
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4& operator*=( Arg v )
|
|
||||||
{
|
|
||||||
m_v = _mm_mul_ps( m_v, v.m_v );
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Returns a*b + c
|
|
||||||
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Returns -( a*b - c )
|
|
||||||
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Reciprocal( Vec4::Arg v )
|
|
||||||
{
|
|
||||||
// get the reciprocal estimate
|
|
||||||
__m128 estimate = _mm_rcp_ps( v.m_v );
|
|
||||||
|
|
||||||
// one round of Newton-Rhaphson refinement
|
|
||||||
__m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
|
|
||||||
return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Truncate( Vec4::Arg v )
|
|
||||||
{
|
|
||||||
#if ( SQUISH_USE_SSE == 1 )
|
|
||||||
// convert to ints
|
|
||||||
__m128 input = v.m_v;
|
|
||||||
__m64 lo = _mm_cvttps_pi32( input );
|
|
||||||
__m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
|
|
||||||
|
|
||||||
// convert to floats
|
|
||||||
__m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
|
|
||||||
__m128 truncated = _mm_cvtpi32_ps( part, lo );
|
|
||||||
|
|
||||||
// clear out the MMX multimedia state to allow FP calls later
|
|
||||||
_mm_empty();
|
|
||||||
return Vec4( truncated );
|
|
||||||
#else
|
|
||||||
// use SSE2 instructions
|
|
||||||
return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
__m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
|
|
||||||
int value = _mm_movemask_ps( bits );
|
|
||||||
return value != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
__m128 m_v;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace squish
|
|
||||||
|
|
||||||
#endif // ndef SQUISH_SIMD_SSE_H
|
|
||||||
|
|
@ -1,166 +0,0 @@
|
||||||
/* -----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
a copy of this software and associated documentation files (the
|
|
||||||
"Software"), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included
|
|
||||||
in all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
||||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#ifndef SQUISH_SIMD_VE_H
|
|
||||||
#define SQUISH_SIMD_VE_H
|
|
||||||
|
|
||||||
#include <altivec.h>
|
|
||||||
#undef bool
|
|
||||||
|
|
||||||
namespace squish {
|
|
||||||
|
|
||||||
#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
|
|
||||||
|
|
||||||
class Vec4
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef Vec4 Arg;
|
|
||||||
|
|
||||||
Vec4() {}
|
|
||||||
|
|
||||||
explicit Vec4( vector float v ) : m_v( v ) {}
|
|
||||||
|
|
||||||
Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
|
|
||||||
|
|
||||||
Vec4& operator=( Vec4 const& arg )
|
|
||||||
{
|
|
||||||
m_v = arg.m_v;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
explicit Vec4( float s )
|
|
||||||
{
|
|
||||||
union { vector float v; float c[4]; } u;
|
|
||||||
u.c[0] = s;
|
|
||||||
u.c[1] = s;
|
|
||||||
u.c[2] = s;
|
|
||||||
u.c[3] = s;
|
|
||||||
m_v = u.v;
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4( float x, float y, float z, float w )
|
|
||||||
{
|
|
||||||
union { vector float v; float c[4]; } u;
|
|
||||||
u.c[0] = x;
|
|
||||||
u.c[1] = y;
|
|
||||||
u.c[2] = z;
|
|
||||||
u.c[3] = w;
|
|
||||||
m_v = u.v;
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec3 GetVec3() const
|
|
||||||
{
|
|
||||||
union { vector float v; float c[4]; } u;
|
|
||||||
u.v = m_v;
|
|
||||||
return Vec3( u.c[0], u.c[1], u.c[2] );
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
|
|
||||||
Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
|
|
||||||
Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
|
|
||||||
Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
|
|
||||||
|
|
||||||
Vec4& operator+=( Arg v )
|
|
||||||
{
|
|
||||||
m_v = vec_add( m_v, v.m_v );
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4& operator-=( Arg v )
|
|
||||||
{
|
|
||||||
m_v = vec_sub( m_v, v.m_v );
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
Vec4& operator*=( Arg v )
|
|
||||||
{
|
|
||||||
m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) );
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( vec_add( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( vec_sub( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Returns a*b + c
|
|
||||||
friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
|
|
||||||
{
|
|
||||||
return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Returns -( a*b - c )
|
|
||||||
friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
|
|
||||||
{
|
|
||||||
return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Reciprocal( Vec4::Arg v )
|
|
||||||
{
|
|
||||||
// get the reciprocal estimate
|
|
||||||
vector float estimate = vec_re( v.m_v );
|
|
||||||
|
|
||||||
// one round of Newton-Rhaphson refinement
|
|
||||||
vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) );
|
|
||||||
return Vec4( vec_madd( diff, estimate, estimate ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( vec_min( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return Vec4( vec_max( left.m_v, right.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend Vec4 Truncate( Vec4::Arg v )
|
|
||||||
{
|
|
||||||
return Vec4( vec_trunc( v.m_v ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
|
|
||||||
{
|
|
||||||
return vec_any_lt( left.m_v, right.m_v ) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
vector float m_v;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace squish
|
|
||||||
|
|
||||||
#endif // ndef SQUISH_SIMD_VE_H
|
|
||||||
|
|
@ -26,7 +26,6 @@
|
||||||
#include "singlecolourfit.h"
|
#include "singlecolourfit.h"
|
||||||
#include "colourset.h"
|
#include "colourset.h"
|
||||||
#include "colourblock.h"
|
#include "colourblock.h"
|
||||||
#include <limits.h>
|
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
#ifndef SQUISH_SINGLECOLOURFIT_H
|
#ifndef SQUISH_SINGLECOLOURFIT_H
|
||||||
#define SQUISH_SINGLECOLOURFIT_H
|
#define SQUISH_SINGLECOLOURFIT_H
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "colourfit.h"
|
#include "colourfit.h"
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,27 @@
|
||||||
|
/* -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included
|
||||||
|
in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------- */
|
||||||
|
|
||||||
static SingleColourLookup const lookup_5_3[] =
|
static SingleColourLookup const lookup_5_3[] =
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
||||||
<plist version="1.0">
|
|
||||||
<dict>
|
|
||||||
<key>CFBundleDevelopmentRegion</key>
|
|
||||||
<string>English</string>
|
|
||||||
<key>CFBundleExecutable</key>
|
|
||||||
<string>${EXECUTABLE_NAME}</string>
|
|
||||||
<key>CFBundleIdentifier</key>
|
|
||||||
<string>com.sjbrown.squish</string>
|
|
||||||
<key>CFBundleInfoDictionaryVersion</key>
|
|
||||||
<string>6.0</string>
|
|
||||||
<key>CFBundlePackageType</key>
|
|
||||||
<string>FMWK</string>
|
|
||||||
<key>CFBundleSignature</key>
|
|
||||||
<string>????</string>
|
|
||||||
<key>CFBundleVersion</key>
|
|
||||||
<string>1.0</string>
|
|
||||||
</dict>
|
|
||||||
</plist>
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
-------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include <squish.h>
|
#include "squish.h"
|
||||||
#include "colourset.h"
|
#include "colourset.h"
|
||||||
#include "maths.h"
|
#include "maths.h"
|
||||||
#include "rangefit.h"
|
#include "rangefit.h"
|
||||||
|
|
@ -37,37 +37,58 @@ namespace squish {
|
||||||
static int FixFlags( int flags )
|
static int FixFlags( int flags )
|
||||||
{
|
{
|
||||||
// grab the flag bits
|
// grab the flag bits
|
||||||
int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
|
int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
|
||||||
int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
|
int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
|
||||||
int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform );
|
|
||||||
int extra = flags & kWeightColourByAlpha;
|
int extra = flags & kWeightColourByAlpha;
|
||||||
|
|
||||||
// set defaults
|
// set defaults
|
||||||
if( method != kDxt3 && method != kDxt5 )
|
if ( method != kDxt3
|
||||||
|
&& method != kDxt5
|
||||||
|
&& method != kBc4
|
||||||
|
&& method != kBc5 )
|
||||||
|
{
|
||||||
method = kDxt1;
|
method = kDxt1;
|
||||||
if( fit != kColourRangeFit )
|
}
|
||||||
|
if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
|
||||||
fit = kColourClusterFit;
|
fit = kColourClusterFit;
|
||||||
if( metric != kColourMetricUniform )
|
|
||||||
metric = kColourMetricPerceptual;
|
|
||||||
|
|
||||||
// done
|
// done
|
||||||
return method | fit | metric | extra;
|
return method | fit | extra;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compress( u8 const* rgba, void* block, int flags )
|
void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
|
||||||
{
|
|
||||||
// compress with full mask
|
|
||||||
CompressMasked( rgba, 0xffff, block, flags );
|
|
||||||
}
|
|
||||||
|
|
||||||
void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
|
|
||||||
{
|
{
|
||||||
// fix any bad flags
|
// fix any bad flags
|
||||||
flags = FixFlags( flags );
|
flags = FixFlags( flags );
|
||||||
|
|
||||||
|
if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
|
||||||
|
{
|
||||||
|
u8 alpha[16*4];
|
||||||
|
for( int i = 0; i < 16; ++i )
|
||||||
|
{
|
||||||
|
alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
|
||||||
|
}
|
||||||
|
|
||||||
|
u8* rBlock = reinterpret_cast< u8* >( block );
|
||||||
|
CompressAlphaDxt5( alpha, mask, rBlock );
|
||||||
|
|
||||||
|
if ( ( flags & ( kBc5 ) ) != 0 )
|
||||||
|
{
|
||||||
|
for( int i = 0; i < 16; ++i )
|
||||||
|
{
|
||||||
|
alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
|
||||||
|
}
|
||||||
|
|
||||||
|
u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
|
||||||
|
CompressAlphaDxt5( alpha, mask, gBlock );
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// get the block locations
|
// get the block locations
|
||||||
void* colourBlock = block;
|
void* colourBlock = block;
|
||||||
void* alphaBock = block;
|
void* alphaBlock = block;
|
||||||
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
|
if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
|
||||||
colourBlock = reinterpret_cast< u8* >( block ) + 8;
|
colourBlock = reinterpret_cast< u8* >( block ) + 8;
|
||||||
|
|
||||||
|
|
@ -84,21 +105,21 @@ void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
|
||||||
else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
|
else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
|
||||||
{
|
{
|
||||||
// do a range fit
|
// do a range fit
|
||||||
RangeFit fit( &colours, flags );
|
RangeFit fit( &colours, flags, metric );
|
||||||
fit.Compress( colourBlock );
|
fit.Compress( colourBlock );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// default to a cluster fit (could be iterative or not)
|
// default to a cluster fit (could be iterative or not)
|
||||||
ClusterFit fit( &colours, flags );
|
ClusterFit fit( &colours, flags, metric );
|
||||||
fit.Compress( colourBlock );
|
fit.Compress( colourBlock );
|
||||||
}
|
}
|
||||||
|
|
||||||
// compress alpha separately if necessary
|
// compress alpha separately if necessary
|
||||||
if( ( flags & kDxt3 ) != 0 )
|
if( ( flags & kDxt3 ) != 0 )
|
||||||
CompressAlphaDxt3( rgba, mask, alphaBock );
|
CompressAlphaDxt3( rgba, mask, alphaBlock );
|
||||||
else if( ( flags & kDxt5 ) != 0 )
|
else if( ( flags & kDxt5 ) != 0 )
|
||||||
CompressAlphaDxt5( rgba, mask, alphaBock );
|
CompressAlphaDxt5( rgba, mask, alphaBlock );
|
||||||
}
|
}
|
||||||
|
|
||||||
void Decompress( u8* rgba, void const* block, int flags )
|
void Decompress( u8* rgba, void const* block, int flags )
|
||||||
|
|
@ -129,18 +150,18 @@ int GetStorageRequirements( int width, int height, int flags )
|
||||||
|
|
||||||
// compute the storage requirements
|
// compute the storage requirements
|
||||||
int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
|
int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
|
||||||
int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
|
int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
|
||||||
return blockcount*blocksize;
|
return blockcount*blocksize;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags )
|
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
|
||||||
{
|
{
|
||||||
// fix any bad flags
|
// fix any bad flags
|
||||||
flags = FixFlags( flags );
|
flags = FixFlags( flags );
|
||||||
|
|
||||||
// initialise the block output
|
// initialise the block output
|
||||||
u8* targetBlock = reinterpret_cast< u8* >( blocks );
|
u8* targetBlock = reinterpret_cast< u8* >( blocks );
|
||||||
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
|
int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
|
||||||
|
|
||||||
// loop over blocks
|
// loop over blocks
|
||||||
for( int y = 0; y < height; y += 4 )
|
for( int y = 0; y < height; y += 4 )
|
||||||
|
|
@ -179,7 +200,7 @@ void CompressImage( u8 const* rgba, int width, int height, void* blocks, int fla
|
||||||
}
|
}
|
||||||
|
|
||||||
// compress it into the output
|
// compress it into the output
|
||||||
CompressMasked( sourceRgba, mask, targetBlock, flags );
|
CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
|
||||||
|
|
||||||
// advance
|
// advance
|
||||||
targetBlock += bytesPerBlock;
|
targetBlock += bytesPerBlock;
|
||||||
|
|
@ -194,7 +215,7 @@ void DecompressImage( u8* rgba, int width, int height, void const* blocks, int f
|
||||||
|
|
||||||
// initialise the block input
|
// initialise the block input
|
||||||
u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
|
u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
|
||||||
int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
|
int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
|
||||||
|
|
||||||
// loop over blocks
|
// loop over blocks
|
||||||
for( int y = 0; y < height; y += 4 )
|
for( int y = 0; y < height; y += 4 )
|
||||||
|
|
|
||||||
|
|
@ -39,74 +39,42 @@ typedef unsigned char u8;
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
//! Use DXT1 compression.
|
//! Use DXT1 compression.
|
||||||
kDxt1 = ( 1 << 0 ),
|
kDxt1 = ( 1 << 0 ),
|
||||||
|
|
||||||
//! Use DXT3 compression.
|
//! Use DXT3 compression.
|
||||||
kDxt3 = ( 1 << 1 ),
|
kDxt3 = ( 1 << 1 ),
|
||||||
|
|
||||||
//! Use DXT5 compression.
|
//! Use DXT5 compression.
|
||||||
kDxt5 = ( 1 << 2 ),
|
kDxt5 = ( 1 << 2 ),
|
||||||
|
|
||||||
//! Use a very slow but very high quality colour compressor.
|
//! Use BC4 compression.
|
||||||
kColourIterativeClusterFit = ( 1 << 8 ),
|
kBc4 = ( 1 << 3 ),
|
||||||
|
|
||||||
|
//! Use BC5 compression.
|
||||||
|
kBc5 = ( 1 << 4 ),
|
||||||
|
|
||||||
//! Use a slow but high quality colour compressor (the default).
|
//! Use a slow but high quality colour compressor (the default).
|
||||||
kColourClusterFit = ( 1 << 3 ),
|
kColourClusterFit = ( 1 << 5 ),
|
||||||
|
|
||||||
//! Use a fast but low quality colour compressor.
|
//! Use a fast but low quality colour compressor.
|
||||||
kColourRangeFit = ( 1 << 4 ),
|
kColourRangeFit = ( 1 << 6 ),
|
||||||
|
|
||||||
//! Use a perceptual metric for colour error (the default).
|
|
||||||
kColourMetricPerceptual = ( 1 << 5 ),
|
|
||||||
|
|
||||||
//! Use a uniform metric for colour error.
|
|
||||||
kColourMetricUniform = ( 1 << 6 ),
|
|
||||||
|
|
||||||
//! Weight the colour by alpha during cluster fit (disabled by default).
|
//! Weight the colour by alpha during cluster fit (disabled by default).
|
||||||
kWeightColourByAlpha = ( 1 << 7 )
|
kWeightColourByAlpha = ( 1 << 7 ),
|
||||||
|
|
||||||
|
//! Use a very slow but very high quality colour compressor.
|
||||||
|
kColourIterativeClusterFit = ( 1 << 8 ),
|
||||||
};
|
};
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
/*! @brief Compresses a 4x4 block of pixels.
|
|
||||||
|
|
||||||
@param rgba The rgba values of the 16 source pixels.
|
|
||||||
@param block Storage for the compressed DXT block.
|
|
||||||
@param flags Compression flags.
|
|
||||||
|
|
||||||
The source pixels should be presented as a contiguous array of 16 rgba
|
|
||||||
values, with each component as 1 byte each. In memory this should be:
|
|
||||||
|
|
||||||
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
|
|
||||||
|
|
||||||
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
|
|
||||||
however, DXT1 will be used by default if none is specified. When using DXT1
|
|
||||||
compression, 8 bytes of storage are required for the compressed DXT block.
|
|
||||||
DXT3 and DXT5 compression require 16 bytes of storage per block.
|
|
||||||
|
|
||||||
The flags parameter can also specify a preferred colour compressor and
|
|
||||||
colour error metric to use when fitting the RGB components of the data.
|
|
||||||
Possible colour compressors are: kColourClusterFit (the default),
|
|
||||||
kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics
|
|
||||||
are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no
|
|
||||||
flags are specified in any particular category then the default will be
|
|
||||||
used. Unknown flags are ignored.
|
|
||||||
|
|
||||||
When using kColourClusterFit, an additional flag can be specified to
|
|
||||||
weight the colour of each pixel by its alpha value. For images that are
|
|
||||||
rendered using alpha blending, this can significantly increase the
|
|
||||||
perceived quality.
|
|
||||||
*/
|
|
||||||
void Compress( u8 const* rgba, void* block, int flags );
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/*! @brief Compresses a 4x4 block of pixels.
|
/*! @brief Compresses a 4x4 block of pixels.
|
||||||
|
|
||||||
@param rgba The rgba values of the 16 source pixels.
|
@param rgba The rgba values of the 16 source pixels.
|
||||||
@param mask The valid pixel mask.
|
@param mask The valid pixel mask.
|
||||||
@param block Storage for the compressed DXT block.
|
@param block Storage for the compressed DXT block.
|
||||||
@param flags Compression flags.
|
@param flags Compression flags.
|
||||||
|
@param metric An optional perceptual metric.
|
||||||
|
|
||||||
The source pixels should be presented as a contiguous array of 16 rgba
|
The source pixels should be presented as a contiguous array of 16 rgba
|
||||||
values, with each component as 1 byte each. In memory this should be:
|
values, with each component as 1 byte each. In memory this should be:
|
||||||
|
|
@ -125,20 +93,68 @@ void Compress( u8 const* rgba, void* block, int flags );
|
||||||
compression, 8 bytes of storage are required for the compressed DXT block.
|
compression, 8 bytes of storage are required for the compressed DXT block.
|
||||||
DXT3 and DXT5 compression require 16 bytes of storage per block.
|
DXT3 and DXT5 compression require 16 bytes of storage per block.
|
||||||
|
|
||||||
The flags parameter can also specify a preferred colour compressor and
|
The flags parameter can also specify a preferred colour compressor to use
|
||||||
colour error metric to use when fitting the RGB components of the data.
|
when fitting the RGB components of the data. Possible colour compressors
|
||||||
Possible colour compressors are: kColourClusterFit (the default),
|
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
|
||||||
kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics
|
quality) or kColourIterativeClusterFit (slowest, best quality).
|
||||||
are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no
|
|
||||||
flags are specified in any particular category then the default will be
|
When using kColourClusterFit or kColourIterativeClusterFit, an additional
|
||||||
used. Unknown flags are ignored.
|
flag can be specified to weight the importance of each pixel by its alpha
|
||||||
|
value. For images that are rendered using alpha blending, this can
|
||||||
|
significantly increase the perceived quality.
|
||||||
|
|
||||||
When using kColourClusterFit, an additional flag can be specified to
|
The metric parameter can be used to weight the relative importance of each
|
||||||
weight the colour of each pixel by its alpha value. For images that are
|
colour channel, or pass NULL to use the default uniform weight of
|
||||||
rendered using alpha blending, this can significantly increase the
|
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
|
||||||
perceived quality.
|
allowed either uniform or "perceptual" weights with the fixed values
|
||||||
|
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
|
||||||
|
contiguous array of 3 floats.
|
||||||
*/
|
*/
|
||||||
void CompressMasked( u8 const* rgba, int mask, void* block, int flags );
|
void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/*! @brief Compresses a 4x4 block of pixels.
|
||||||
|
|
||||||
|
@param rgba The rgba values of the 16 source pixels.
|
||||||
|
@param block Storage for the compressed DXT block.
|
||||||
|
@param flags Compression flags.
|
||||||
|
@param metric An optional perceptual metric.
|
||||||
|
|
||||||
|
The source pixels should be presented as a contiguous array of 16 rgba
|
||||||
|
values, with each component as 1 byte each. In memory this should be:
|
||||||
|
|
||||||
|
{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
|
||||||
|
|
||||||
|
The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression,
|
||||||
|
however, DXT1 will be used by default if none is specified. When using DXT1
|
||||||
|
compression, 8 bytes of storage are required for the compressed DXT block.
|
||||||
|
DXT3 and DXT5 compression require 16 bytes of storage per block.
|
||||||
|
|
||||||
|
The flags parameter can also specify a preferred colour compressor to use
|
||||||
|
when fitting the RGB components of the data. Possible colour compressors
|
||||||
|
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
|
||||||
|
quality) or kColourIterativeClusterFit (slowest, best quality).
|
||||||
|
|
||||||
|
When using kColourClusterFit or kColourIterativeClusterFit, an additional
|
||||||
|
flag can be specified to weight the importance of each pixel by its alpha
|
||||||
|
value. For images that are rendered using alpha blending, this can
|
||||||
|
significantly increase the perceived quality.
|
||||||
|
|
||||||
|
The metric parameter can be used to weight the relative importance of each
|
||||||
|
colour channel, or pass NULL to use the default uniform weight of
|
||||||
|
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
|
||||||
|
allowed either uniform or "perceptual" weights with the fixed values
|
||||||
|
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
|
||||||
|
contiguous array of 3 floats.
|
||||||
|
|
||||||
|
This method is an inline that calls CompressMasked with a mask of 0xffff,
|
||||||
|
provided for compatibility with older versions of squish.
|
||||||
|
*/
|
||||||
|
inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
|
||||||
|
{
|
||||||
|
CompressMasked( rgba, 0xffff, block, flags, metric );
|
||||||
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
@ -186,6 +202,7 @@ int GetStorageRequirements( int width, int height, int flags );
|
||||||
@param height The height of the source image.
|
@param height The height of the source image.
|
||||||
@param blocks Storage for the compressed output.
|
@param blocks Storage for the compressed output.
|
||||||
@param flags Compression flags.
|
@param flags Compression flags.
|
||||||
|
@param metric An optional perceptual metric.
|
||||||
|
|
||||||
The source pixels should be presented as a contiguous array of width*height
|
The source pixels should be presented as a contiguous array of width*height
|
||||||
rgba values, with each component as 1 byte each. In memory this should be:
|
rgba values, with each component as 1 byte each. In memory this should be:
|
||||||
|
|
@ -197,24 +214,29 @@ int GetStorageRequirements( int width, int height, int flags );
|
||||||
compression, 8 bytes of storage are required for each compressed DXT block.
|
compression, 8 bytes of storage are required for each compressed DXT block.
|
||||||
DXT3 and DXT5 compression require 16 bytes of storage per block.
|
DXT3 and DXT5 compression require 16 bytes of storage per block.
|
||||||
|
|
||||||
The flags parameter can also specify a preferred colour compressor and
|
The flags parameter can also specify a preferred colour compressor to use
|
||||||
colour error metric to use when fitting the RGB components of the data.
|
when fitting the RGB components of the data. Possible colour compressors
|
||||||
Possible colour compressors are: kColourClusterFit (the default),
|
are: kColourClusterFit (the default), kColourRangeFit (very fast, low
|
||||||
kColourRangeFit or kColourIterativeClusterFit. Possible colour error metrics
|
quality) or kColourIterativeClusterFit (slowest, best quality).
|
||||||
are: kColourMetricPerceptual (the default) or kColourMetricUniform. If no
|
|
||||||
flags are specified in any particular category then the default will be
|
When using kColourClusterFit or kColourIterativeClusterFit, an additional
|
||||||
used. Unknown flags are ignored.
|
flag can be specified to weight the importance of each pixel by its alpha
|
||||||
|
value. For images that are rendered using alpha blending, this can
|
||||||
|
significantly increase the perceived quality.
|
||||||
|
|
||||||
When using kColourClusterFit, an additional flag can be specified to
|
The metric parameter can be used to weight the relative importance of each
|
||||||
weight the colour of each pixel by its alpha value. For images that are
|
colour channel, or pass NULL to use the default uniform weight of
|
||||||
rendered using alpha blending, this can significantly increase the
|
{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
|
||||||
perceived quality.
|
allowed either uniform or "perceptual" weights with the fixed values
|
||||||
|
{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
|
||||||
|
contiguous array of 3 floats.
|
||||||
|
|
||||||
Internally this function calls squish::Compress for each block. To see how
|
Internally this function calls squish::CompressMasked for each block, which
|
||||||
much memory is required in the compressed image, use
|
allows for pixels outside the image to take arbitrary values. The function
|
||||||
squish::GetStorageRequirements.
|
squish::GetStorageRequirements can be called to compute the amount of memory
|
||||||
|
to allocate for the compressed output.
|
||||||
*/
|
*/
|
||||||
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags );
|
void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,508 +0,0 @@
|
||||||
Name
|
|
||||||
|
|
||||||
EXT_texture_compression_s3tc
|
|
||||||
|
|
||||||
Name Strings
|
|
||||||
|
|
||||||
GL_EXT_texture_compression_s3tc
|
|
||||||
|
|
||||||
Contact
|
|
||||||
|
|
||||||
Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com)
|
|
||||||
|
|
||||||
Status
|
|
||||||
|
|
||||||
FINAL
|
|
||||||
|
|
||||||
Version
|
|
||||||
|
|
||||||
1.1, 16 November 2001 (containing only clarifications relative to
|
|
||||||
version 1.0, dated 7 July 2000)
|
|
||||||
|
|
||||||
Number
|
|
||||||
|
|
||||||
198
|
|
||||||
|
|
||||||
Dependencies
|
|
||||||
|
|
||||||
OpenGL 1.1 is required.
|
|
||||||
|
|
||||||
GL_ARB_texture_compression is required.
|
|
||||||
|
|
||||||
This extension is written against the OpenGL 1.2.1 Specification.
|
|
||||||
|
|
||||||
Overview
|
|
||||||
|
|
||||||
This extension provides additional texture compression functionality
|
|
||||||
specific to S3's S3TC format (called DXTC in Microsoft's DirectX API),
|
|
||||||
subject to all the requirements and limitations described by the extension
|
|
||||||
GL_ARB_texture_compression.
|
|
||||||
|
|
||||||
This extension supports DXT1, DXT3, and DXT5 texture compression formats.
|
|
||||||
For the DXT1 image format, this specification supports an RGB-only mode
|
|
||||||
and a special RGBA mode with single-bit "transparent" alpha.
|
|
||||||
|
|
||||||
IP Status
|
|
||||||
|
|
||||||
Contact S3 Incorporated (http://www.s3.com) regarding any intellectual
|
|
||||||
property issues associated with implementing this extension.
|
|
||||||
|
|
||||||
WARNING: Vendors able to support S3TC texture compression in Direct3D
|
|
||||||
drivers do not necessarily have the right to use the same functionality in
|
|
||||||
OpenGL.
|
|
||||||
|
|
||||||
Issues
|
|
||||||
|
|
||||||
(1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported?
|
|
||||||
|
|
||||||
RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4
|
|
||||||
would require some rework to the TexEnv definition (maybe add a new
|
|
||||||
base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats.
|
|
||||||
Note that the EXT_texture_env_combine extension (which extends normal
|
|
||||||
TexEnv modes) can be used to support textures with premultipled alpha.
|
|
||||||
|
|
||||||
(2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported
|
|
||||||
or should we use only the DXT<n> enums?
|
|
||||||
|
|
||||||
RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3
|
|
||||||
and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is
|
|
||||||
also) yet one format must be chosen up front.
|
|
||||||
|
|
||||||
(3) Should TexSubImage support all block-aligned edits or just the minimal
|
|
||||||
functionality required by the ARB_texture_compression extension?
|
|
||||||
|
|
||||||
RESOLVED: Allow all valid block-aligned edits.
|
|
||||||
|
|
||||||
(4) A pre-compressed image with a DXT1 format can be used as either an
|
|
||||||
RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has
|
|
||||||
transparent texels, how are they treated in each format?
|
|
||||||
|
|
||||||
RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format
|
|
||||||
is decoded as RGB (where alpha is effectively one for all texels),
|
|
||||||
while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all
|
|
||||||
texels with "transparent" encodings). Otherwise, the formats are
|
|
||||||
identical.
|
|
||||||
|
|
||||||
(5) Is the encoding of the RGB components for DXT1 formats correct in this
|
|
||||||
spec? MSDN documentation does not specify an RGB color for the
|
|
||||||
"transparent" encoding. Is it really black?
|
|
||||||
|
|
||||||
RESOLVED: Yes. The specification for the DXT1 format initially
|
|
||||||
required black, but later changed that requirement to a
|
|
||||||
recommendation. All vendors involved in the definition of this
|
|
||||||
specification support black. In addition, specifying black has a
|
|
||||||
useful behavior.
|
|
||||||
|
|
||||||
When blending multiple texels (GL_LINEAR filtering), mixing opaque and
|
|
||||||
transparent samples is problematic. Defining a black color on
|
|
||||||
transparent texels achieves a sensible result that works like a
|
|
||||||
texture with premultiplied alpha. For example, if three opaque white
|
|
||||||
and one transparent sample is being averaged, the result would be a
|
|
||||||
75% intensity gray (with an alpha of 75%). This is the same result on
|
|
||||||
the color channels as would be obtained using a white color, 75%
|
|
||||||
alpha, and a SRC_ALPHA blend factor.
|
|
||||||
|
|
||||||
(6) Is the encoding of the RGB components for DXT3 and DXT5 formats
|
|
||||||
correct in this spec? MSDN documentation suggests that the RGB blocks
|
|
||||||
for DXT3 and DXT5 are decoded as described by the DXT1 format.
|
|
||||||
|
|
||||||
RESOLVED: Yes -- this appears to be a bug in the MSDN documentation.
|
|
||||||
The specification for the DXT2-DXT5 formats require decoding using the
|
|
||||||
opaque block encoding, regardless of the relative values of "color0"
|
|
||||||
and "color1".
|
|
||||||
|
|
||||||
New Procedures and Functions
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
New Tokens
|
|
||||||
|
|
||||||
Accepted by the <internalformat> parameter of TexImage2D, CopyTexImage2D,
|
|
||||||
and CompressedTexImage2DARB and the <format> parameter of
|
|
||||||
CompressedTexSubImage2DARB:
|
|
||||||
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
|
|
||||||
|
|
||||||
Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation)
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization)
|
|
||||||
|
|
||||||
Add to Table 3.16.1: Specific Compressed Internal Formats
|
|
||||||
|
|
||||||
Compressed Internal Format Base Internal Format
|
|
||||||
========================== ====================
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT RGB
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA
|
|
||||||
|
|
||||||
|
|
||||||
Modify Section 3.8.2, Alternate Image Specification
|
|
||||||
|
|
||||||
(add to end of TexSubImage discussion, p.123 -- after edit from the
|
|
||||||
ARB_texture_compression spec)
|
|
||||||
|
|
||||||
If the internal format of the texture image being modified is
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
|
|
||||||
texture is stored using one of the several S3TC compressed texture image
|
|
||||||
formats. Such images are easily edited along 4x4 texel boundaries, so the
|
|
||||||
limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed.
|
|
||||||
TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION
|
|
||||||
error only if one of the following conditions occurs:
|
|
||||||
|
|
||||||
* <width> is not a multiple of four or equal to TEXTURE_WIDTH,
|
|
||||||
unless <xoffset> and <yoffset> are both zero.
|
|
||||||
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT,
|
|
||||||
unless <xoffset> and <yoffset> are both zero.
|
|
||||||
* <xoffset> or <yoffset> is not a multiple of four.
|
|
||||||
|
|
||||||
The contents of any 4x4 block of texels of an S3TC compressed texture
|
|
||||||
image that does not intersect the area being modified are preserved during
|
|
||||||
valid TexSubImage2D and CopyTexSubImage2D calls.
|
|
||||||
|
|
||||||
|
|
||||||
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
|
|
||||||
the CompressedTexImage section introduced by the ARB_texture_compression
|
|
||||||
spec)
|
|
||||||
|
|
||||||
If <internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one
|
|
||||||
of several S3TC compressed texture image formats. The S3TC texture
|
|
||||||
compression algorithm supports only 2D images without borders.
|
|
||||||
CompressedTexImage1DARB and CompressedTexImage3DARB produce an
|
|
||||||
INVALID_ENUM error if <internalformat> is an S3TC format.
|
|
||||||
CompressedTexImage2DARB will produce an INVALID_OPERATION error if
|
|
||||||
<border> is non-zero.
|
|
||||||
|
|
||||||
|
|
||||||
Add to Section 3.8.2, Alternate Image Specification (adding to the end of
|
|
||||||
the CompressedTexSubImage section introduced by the
|
|
||||||
ARB_texture_compression spec)
|
|
||||||
|
|
||||||
If the internal format of the texture image being modified is
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the
|
|
||||||
texture is stored using one of the several S3TC compressed texture image
|
|
||||||
formats. Since the S3TC texture compression algorithm supports only 2D
|
|
||||||
images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce
|
|
||||||
an INVALID_ENUM error if <format> is an S3TC format. Since S3TC images
|
|
||||||
are easily edited along 4x4 texel boundaries, the limitations on
|
|
||||||
CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result
|
|
||||||
in an INVALID_OPERATION error only if one of the following conditions
|
|
||||||
occurs:
|
|
||||||
|
|
||||||
* <width> is not a multiple of four or equal to TEXTURE_WIDTH.
|
|
||||||
* <height> is not a multiple of four or equal to TEXTURE_HEIGHT.
|
|
||||||
* <xoffset> or <yoffset> is not a multiple of four.
|
|
||||||
|
|
||||||
The contents of any 4x4 block of texels of an S3TC compressed texture
|
|
||||||
image that does not intersect the area being modified are preserved during
|
|
||||||
valid TexSubImage2D and CopyTexSubImage2D calls.
|
|
||||||
|
|
||||||
Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment
|
|
||||||
Operations and the Frame Buffer)
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions)
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and
|
|
||||||
State Requests)
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance)
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
Additions to the AGL/GLX/WGL Specifications
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
GLX Protocol
|
|
||||||
|
|
||||||
None.
|
|
||||||
|
|
||||||
Errors
|
|
||||||
|
|
||||||
INVALID_ENUM is generated by CompressedTexImage1DARB or
|
|
||||||
CompressedTexImage3DARB if <internalformat> is
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT.
|
|
||||||
|
|
||||||
INVALID_OPERATION is generated by CompressedTexImage2DARB if
|
|
||||||
<internalformat> is COMPRESSED_RGB_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT5_EXT and <border> is not equal to zero.
|
|
||||||
|
|
||||||
INVALID_ENUM is generated by CompressedTexSubImage1DARB or
|
|
||||||
CompressedTexSubImage3DARB if <format> is COMPRESSED_RGB_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT5_EXT.
|
|
||||||
|
|
||||||
INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or
|
|
||||||
CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT,
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of
|
|
||||||
the following apply: <width> is not a multiple of four or equal to
|
|
||||||
TEXTURE_WIDTH; <height> is not a multiple of four or equal to
|
|
||||||
TEXTURE_HEIGHT; <xoffset> or <yoffset> is not a multiple of four.
|
|
||||||
|
|
||||||
|
|
||||||
The following restrictions from the ARB_texture_compression specification
|
|
||||||
do not apply to S3TC texture formats, since subimage modification is
|
|
||||||
straightforward as long as the subimage is properly aligned.
|
|
||||||
|
|
||||||
DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D,
|
|
||||||
DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or
|
|
||||||
DELETE: CopyTexSubImage3D if the internal format of the texture image is
|
|
||||||
DELETE: compressed and <xoffset>, <yoffset>, or <zoffset> does not equal
|
|
||||||
DELETE: -b, where b is value of TEXTURE_BORDER.
|
|
||||||
|
|
||||||
DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB,
|
|
||||||
DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the
|
|
||||||
DELETE: entire texture image is not being edited: if <xoffset>,
|
|
||||||
DELETE: <yoffset>, or <zoffset> is greater than -b, <xoffset> + <width> is
|
|
||||||
DELETE: less than w+b, <yoffset> + <height> is less than h+b, or <zoffset>
|
|
||||||
DELETE: + <depth> is less than d+b, where b is the value of
|
|
||||||
DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of
|
|
||||||
DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH.
|
|
||||||
|
|
||||||
See also errors in the GL_ARB_texture_compression specification.
|
|
||||||
|
|
||||||
New State
|
|
||||||
|
|
||||||
In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT
|
|
||||||
subscript for Z by 4 in the "Type" row.
|
|
||||||
|
|
||||||
New Implementation Dependent State
|
|
||||||
|
|
||||||
None
|
|
||||||
|
|
||||||
Appendix
|
|
||||||
|
|
||||||
S3TC Compressed Texture Image Formats
|
|
||||||
|
|
||||||
Compressed texture images stored using the S3TC compressed image formats
|
|
||||||
are represented as a collection of 4x4 texel blocks, where each block
|
|
||||||
contains 64 or 128 bits of texel data. The image is encoded as a normal
|
|
||||||
2D raster image in which each 4x4 block is treated as a single pixel. If
|
|
||||||
an S3TC image has a width or height less than four, the data corresponding
|
|
||||||
to texels outside the image are irrelevant and undefined.
|
|
||||||
|
|
||||||
When an S3TC image with a width of <w>, height of <h>, and block size of
|
|
||||||
<blocksize> (8 or 16 bytes) is decoded, the corresponding image size (in
|
|
||||||
bytes) is:
|
|
||||||
|
|
||||||
ceil(<w>/4) * ceil(<h>/4) * blocksize.
|
|
||||||
|
|
||||||
When decoding an S3TC image, the block containing the texel at offset
|
|
||||||
(<x>, <y>) begins at an offset (in bytes) relative to the base of the
|
|
||||||
image of:
|
|
||||||
|
|
||||||
blocksize * (ceil(<w>/4) * floor(<y>/4) + floor(<x>/4)).
|
|
||||||
|
|
||||||
The data corresponding to a specific texel (<x>, <y>) are extracted from a
|
|
||||||
4x4 texel block using a relative (x,y) value of
|
|
||||||
|
|
||||||
(<x> modulo 4, <y> modulo 4).
|
|
||||||
|
|
||||||
There are four distinct S3TC image formats:
|
|
||||||
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
|
|
||||||
bits of RGB image data.
|
|
||||||
|
|
||||||
Each RGB image data block is encoded as a sequence of 8 bytes, called (in
|
|
||||||
order of increasing address):
|
|
||||||
|
|
||||||
c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3
|
|
||||||
|
|
||||||
The 8 bytes of the block are decoded into three quantities:
|
|
||||||
|
|
||||||
color0 = c0_lo + c0_hi * 256
|
|
||||||
color1 = c1_lo + c1_hi * 256
|
|
||||||
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3))
|
|
||||||
|
|
||||||
color0 and color1 are 16-bit unsigned integers that are unpacked to
|
|
||||||
RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with
|
|
||||||
a <format> of RGB and a type of UNSIGNED_SHORT_5_6_5.
|
|
||||||
|
|
||||||
bits is a 32-bit unsigned integer, from which a two-bit control code
|
|
||||||
is extracted for a texel at location (x,y) in the block using:
|
|
||||||
|
|
||||||
code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0]
|
|
||||||
|
|
||||||
where bit 31 is the most significant and bit 0 is the least
|
|
||||||
significant bit.
|
|
||||||
|
|
||||||
The RGB color for a texel at location (x,y) in the block is given by:
|
|
||||||
|
|
||||||
RGB0, if color0 > color1 and code(x,y) == 0
|
|
||||||
RGB1, if color0 > color1 and code(x,y) == 1
|
|
||||||
(2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2
|
|
||||||
(RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3
|
|
||||||
|
|
||||||
RGB0, if color0 <= color1 and code(x,y) == 0
|
|
||||||
RGB1, if color0 <= color1 and code(x,y) == 1
|
|
||||||
(RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2
|
|
||||||
BLACK, if color0 <= color1 and code(x,y) == 3
|
|
||||||
|
|
||||||
Arithmetic operations are done per component, and BLACK refers to an
|
|
||||||
RGB color where red, green, and blue are all zero.
|
|
||||||
|
|
||||||
Since this image has an RGB format, there is no alpha component and the
|
|
||||||
image is considered fully opaque.
|
|
||||||
|
|
||||||
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64
|
|
||||||
bits of RGB image data and minimal alpha information. The RGB components
|
|
||||||
of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT.
|
|
||||||
|
|
||||||
The alpha component for a texel at location (x,y) in the block is
|
|
||||||
given by:
|
|
||||||
|
|
||||||
0.0, if color0 <= color1 and code(x,y) == 3
|
|
||||||
1.0, otherwise
|
|
||||||
|
|
||||||
IMPORTANT: When encoding an RGBA image into a format using 1-bit
|
|
||||||
alpha, any texels with an alpha component less than 0.5 end up with an
|
|
||||||
alpha of 0.0 and any texels with an alpha component greater than or
|
|
||||||
equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image
|
|
||||||
into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red,
|
|
||||||
green, and blue components of any texels with a final alpha of 0.0
|
|
||||||
will automatically be zero (black). If this behavior is not desired
|
|
||||||
by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT.
|
|
||||||
This format will never be used when a generic compressed internal
|
|
||||||
format (Table 3.16.2) is specified, although the nearly identical
|
|
||||||
format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be.
|
|
||||||
|
|
||||||
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64
|
|
||||||
bits of uncompressed alpha image data followed by 64 bits of RGB image
|
|
||||||
data.
|
|
||||||
|
|
||||||
Each RGB image data block is encoded according to the
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
|
|
||||||
bits always use the non-transparent encodings. In other words, they are
|
|
||||||
treated as though color0 > color1, regardless of the actual values of
|
|
||||||
color0 and color1.
|
|
||||||
|
|
||||||
Each alpha image data block is encoded as a sequence of 8 bytes, called
|
|
||||||
(in order of increasing address):
|
|
||||||
|
|
||||||
a0, a1, a2, a3, a4, a5, a6, a7
|
|
||||||
|
|
||||||
The 8 bytes of the block are decoded into one 64-bit integer:
|
|
||||||
|
|
||||||
alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 +
|
|
||||||
256 * (a5 + 256 * (a6 + 256 * a7))))))
|
|
||||||
|
|
||||||
alpha is a 64-bit unsigned integer, from which a four-bit alpha value
|
|
||||||
is extracted for a texel at location (x,y) in the block using:
|
|
||||||
|
|
||||||
alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0]
|
|
||||||
|
|
||||||
where bit 63 is the most significant and bit 0 is the least
|
|
||||||
significant bit.
|
|
||||||
|
|
||||||
The alpha component for a texel at location (x,y) in the block is
|
|
||||||
given by alpha(x,y) / 15.
|
|
||||||
|
|
||||||
|
|
||||||
COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64
|
|
||||||
bits of compressed alpha image data followed by 64 bits of RGB image data.
|
|
||||||
|
|
||||||
Each RGB image data block is encoded according to the
|
|
||||||
COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code
|
|
||||||
bits always use the non-transparent encodings. In other words, they are
|
|
||||||
treated as though color0 > color1, regardless of the actual values of
|
|
||||||
color0 and color1.
|
|
||||||
|
|
||||||
Each alpha image data block is encoded as a sequence of 8 bytes, called
|
|
||||||
(in order of increasing address):
|
|
||||||
|
|
||||||
alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5
|
|
||||||
|
|
||||||
The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha
|
|
||||||
components by multiplying by 1/255.
|
|
||||||
|
|
||||||
The 6 "bits" bytes of the block are decoded into one 48-bit integer:
|
|
||||||
|
|
||||||
bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 +
|
|
||||||
256 * (bits_4 + 256 * bits_5))))
|
|
||||||
|
|
||||||
bits is a 48-bit unsigned integer, from which a three-bit control code
|
|
||||||
is extracted for a texel at location (x,y) in the block using:
|
|
||||||
|
|
||||||
code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
|
|
||||||
|
|
||||||
where bit 47 is the most significant and bit 0 is the least
|
|
||||||
significant bit.
|
|
||||||
|
|
||||||
The alpha component for a texel at location (x,y) in the block is
|
|
||||||
given by:
|
|
||||||
|
|
||||||
alpha0, code(x,y) == 0
|
|
||||||
alpha1, code(x,y) == 1
|
|
||||||
|
|
||||||
(6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2
|
|
||||||
(5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3
|
|
||||||
(4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4
|
|
||||||
(3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5
|
|
||||||
(2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6
|
|
||||||
(1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7
|
|
||||||
|
|
||||||
(4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2
|
|
||||||
(3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3
|
|
||||||
(2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4
|
|
||||||
(1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5
|
|
||||||
0.0, alpha0 <= alpha1 and code(x,y) == 6
|
|
||||||
1.0, alpha0 <= alpha1 and code(x,y) == 7
|
|
||||||
|
|
||||||
|
|
||||||
Revision History
|
|
||||||
|
|
||||||
1.1, 11/16/01 pbrown: Updated contact info, clarified where texels
|
|
||||||
fall within a single block.
|
|
||||||
|
|
||||||
1.0, 07/07/00 prbrown1: Published final version agreed to by working
|
|
||||||
group members.
|
|
||||||
|
|
||||||
0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls
|
|
||||||
do not modify existing texels outside the
|
|
||||||
modified blocks. Added caveat to allow for a
|
|
||||||
(0,0)-anchored TexSubImage operation of
|
|
||||||
arbitrary size.
|
|
||||||
|
|
||||||
0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings
|
|
||||||
where the MSDN documentation doesn't match what
|
|
||||||
is really done. Added enum values from the
|
|
||||||
extension registry.
|
|
||||||
|
|
||||||
0.4, 03/28/00 prbrown1: Updated to reflect final version of the
|
|
||||||
ARB_texture_compression extension. Allowed
|
|
||||||
block-aligned TexSubImage calls.
|
|
||||||
|
|
||||||
0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB
|
|
||||||
blocks in the DXT3 and DXT5 formats (they don't
|
|
||||||
ever use the "transparent" encoding). Fixed
|
|
||||||
decoding of DXT1 blocks. Pointed out issue of
|
|
||||||
"transparent" texels in DXT1 encodings having
|
|
||||||
different behaviors for RGB and RGBA internal
|
|
||||||
formats.
|
|
||||||
|
|
||||||
0.2, 02/23/00 prbrown1: Minor revisions; added several issues.
|
|
||||||
|
|
||||||
0.11, 02/17/00 prbrown1: Slight modification to error semantics
|
|
||||||
(INVALID_ENUM instead of INVALID_OPERATION).
|
|
||||||
|
|
||||||
0.1, 02/15/00 prbrown1: Initial revision.
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue