Bullet Library v2.81

2026-04-24 22:05:40 +00:00 · 2013-07-04 20:50:16 +02:00 · 2013-07-04 20:50:16 +02:00 · 1eb94f4828
commit 1eb94f4828
parent 64fef8b2ad
462 changed files with 59613 additions and 8036 deletions
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt
@ -0,0 +1,13 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+
+SUBDIRS ( 
+	OpenCL
+)
+
+IF( USE_DX11 )
+	SUBDIRS( DX11 )
+ENDIF( USE_DX11 )
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt
@ -0,0 +1,83 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+SET(DXSDK_DIR $ENV{DXSDK_DIR})
+SET(DX11_INCLUDE_PATH  "${DIRECTX_SDK_BASE_DIR}/Include" CACHE DOCSTRING "Microsoft directX SDK include path")
+
+
+INCLUDE_DIRECTORIES(
+${DX11_INCLUDE_PATH} "../Shared/"
+${VECTOR_MATH_INCLUDE}
+)
+
+SET(BulletSoftBodyDX11Solvers_SRCS
+	btSoftBodySolver_DX11.cpp
+	btSoftBodySolver_DX11SIMDAware.cpp
+)
+
+SET(BulletSoftBodyDX11Solvers_HDRS
+	btSoftBodySolver_DX11.h
+	btSoftBodySolver_DX11SIMDAware.h
+	../Shared/btSoftBodySolverData.h
+	btSoftBodySolverVertexData_DX11.h
+	btSoftBodySolverTriangleData_DX11.h
+	btSoftBodySolverLinkData_DX11.h
+	btSoftBodySolverLinkData_DX11SIMDAware.h
+	btSoftBodySolverBuffer_DX11.h
+	btSoftBodySolverVertexBuffer_DX11.h
+
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyDX11Solvers_Shaders
+	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	ComputeBounds
+	SolvePositions
+	SolvePositionsSIMDBatched
+	SolveCollisionsAndUpdateVelocities
+	SolveCollisionsAndUpdateVelocitiesSIMDBatched
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyDX11Solvers_Shaders})
+    LIST(APPEND BulletSoftBodyDX11Solvers_HLSL "HLSL/${f}.hlsl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_DX11  ${BulletSoftBodyDX11Solvers_SRCS} ${BulletSoftBodyDX11Solvers_HDRS} ${BulletSoftBodyDX11Solvers_HLSL})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_DX11 BulletSoftBody BulletDynamics)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES PUBLIC_HEADER "${BulletSoftBodyDX11Solvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl
@ -0,0 +1,95 @@
+MSTRINGIFY(
+
+cbuffer ApplyForcesCB : register( b0 )
+{
+	unsigned int numNodes;
+	float solverdt;
+	float epsilon;
+	int padding3;
+};
+
+
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexNormal : register( t1 );
+StructuredBuffer<float> g_vertexArea : register( t2 );
+StructuredBuffer<float> g_vertexInverseMass : register( t3 );
+// TODO: These could be combined into a lift/drag factor array along with medium density
+StructuredBuffer<float> g_clothLiftFactor : register( t4 );
+StructuredBuffer<float> g_clothDragFactor : register( t5 );
+StructuredBuffer<float4> g_clothWindVelocity : register( t6 );
+StructuredBuffer<float4> g_clothAcceleration : register( t7 );
+StructuredBuffer<float> g_clothMediumDensity : register( t8 );
+
+RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
+
+float3 projectOnAxis( float3 v, float3 a )
+{
+	return (a*dot(v, a));
+}
+
+[numthreads(128, 1, 1)]
+void 
+ApplyForcesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	unsigned int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{		
+		int clothId = g_vertexClothIdentifier[nodeID];
+		float nodeIM = g_vertexInverseMass[nodeID];
+		
+		if( nodeIM > 0.0f )
+		{
+			float3 nodeV = g_vertexVelocity[nodeID].xyz;
+			float3 normal = g_vertexNormal[nodeID].xyz;
+			float area = g_vertexArea[nodeID];
+			float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
+			
+			// Read per-cloth values
+			float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
+			float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
+			float liftFactor = g_clothLiftFactor[clothId];
+			float dragFactor = g_clothDragFactor[clothId];
+			float mediumDensity = g_clothMediumDensity[clothId];
+		
+			// Apply the acceleration to the cloth rather than do this via a force
+			nodeV += (clothAcceleration*solverdt);
+
+			g_vertexVelocity[nodeID] = float4(nodeV, 0.f);
+
+			float3 relativeWindVelocity = nodeV - clothWindVelocity;
+			float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
+			
+			if( relativeSpeedSquared > epsilon )
+			{
+				// Correct direction of normal relative to wind direction and get dot product
+				normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
+				float dvNormal = dot(normal, relativeWindVelocity);
+				if( dvNormal > 0 )
+				{
+					float3 force = float3(0.f, 0.f, 0.f);
+					float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
+					float c1 = c0 * mediumDensity;
+					force += normal * (-c1 * liftFactor);
+					force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
+					
+					float dtim = solverdt * nodeIM;
+					float3 forceDTIM = force * dtim;
+					
+					float3 nodeFPlusForce = nodeF + force;
+					
+					// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;	
+					float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
+					
+					nodeF = nodeFPlusForce;
+					if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
+						nodeF = nodeFMinus;
+									
+					g_vertexForceAccumulator[nodeID] = float4(nodeF, 0.0f);	
+				}
+			}
+		}
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ComputeBounds.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ComputeBounds.hlsl
@ -0,0 +1,83 @@
+MSTRINGIFY(
+
+cbuffer ComputeBoundsCB : register( b0 )
+{
+	int numNodes;
+	int numSoftBodies;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+
+RWStructuredBuffer<uint4> g_clothMinBounds : register( u0 );
+RWStructuredBuffer<uint4> g_clothMaxBounds : register( u1 );
+
+groupshared uint4 clothMinBounds[256];
+groupshared uint4 clothMaxBounds[256];
+
+[numthreads(128, 1, 1)]
+void 
+ComputeBoundsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	const unsigned int UINT_MAX = 0xffffffff;
+
+	// Init min and max bounds arrays
+	if( GTid.x < numSoftBodies )
+	{
+		clothMinBounds[GTid.x] = uint4(UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX);
+		clothMaxBounds[GTid.x] = uint4(0,0,0,0);
+	}
+
+	AllMemoryBarrierWithGroupSync();
+
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		if( clothIdentifier >= 0 )
+		{
+			float3 position = g_vertexPositions[nodeID].xyz;
+
+			// Reinterpret position as uint
+			uint3 positionUInt = uint3(asuint(position.x), asuint(position.y), asuint(position.z));
+		
+			// Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints
+			//positionUInt.x ^= uint((-int(positionUInt.x >> 31) | 0x80000000));
+			//positionUInt.y ^= uint((-int(positionUInt.y >> 31) | 0x80000000));
+			//positionUInt.z ^= uint((-int(positionUInt.z >> 31) | 0x80000000));
+			positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
+			positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);		
+			positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
+		
+			// Min/max with the LDS values
+			InterlockedMin(clothMinBounds[clothIdentifier].x, positionUInt.x);
+			InterlockedMin(clothMinBounds[clothIdentifier].y, positionUInt.y);
+			InterlockedMin(clothMinBounds[clothIdentifier].z, positionUInt.z);
+
+			InterlockedMax(clothMaxBounds[clothIdentifier].x, positionUInt.x);
+			InterlockedMax(clothMaxBounds[clothIdentifier].y, positionUInt.y);
+			InterlockedMax(clothMaxBounds[clothIdentifier].z, positionUInt.z);
+		}
+	}
+	
+	AllMemoryBarrierWithGroupSync();
+
+
+	// Use global atomics to update the global versions of the data
+	if( GTid.x < numSoftBodies )
+	{
+		InterlockedMin(g_clothMinBounds[GTid.x].x, clothMinBounds[GTid.x].x);
+		InterlockedMin(g_clothMinBounds[GTid.x].y, clothMinBounds[GTid.x].y);
+		InterlockedMin(g_clothMinBounds[GTid.x].z, clothMinBounds[GTid.x].z);
+
+		InterlockedMax(g_clothMaxBounds[GTid.x].x, clothMaxBounds[GTid.x].x);		
+		InterlockedMax(g_clothMaxBounds[GTid.x].y, clothMaxBounds[GTid.x].y);
+		InterlockedMax(g_clothMaxBounds[GTid.x].z, clothMaxBounds[GTid.x].z);
+	}
+}
+
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl
@ -0,0 +1,41 @@
+MSTRINGIFY(
+
+cbuffer IntegrateCB : register( b0 )
+{
+	int numNodes;
+	float solverdt;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<float> g_vertexInverseMasses : register( t0 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
+RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u2 );
+RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u3 );
+
+[numthreads(128, 1, 1)]
+void 
+IntegrateKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocity[nodeID].xyz;
+		float3 force = g_vertexForceAccumulator[nodeID].xyz;
+		float inverseMass = g_vertexInverseMasses[nodeID];
+		
+		g_vertexPreviousPositions[nodeID] = float4(position, 0.f);
+		velocity += force * inverseMass * solverdt;
+		position += velocity * solverdt;
+		
+		g_vertexForceAccumulator[nodeID] = float4(0.f, 0.f, 0.f, 0.0f);
+		g_vertexPositions[nodeID] = float4(position, 0.f);
+		g_vertexVelocity[nodeID] = float4(velocity, 0.f);	
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl
@ -0,0 +1,63 @@
+MSTRINGIFY(
+
+cbuffer OutputToVertexArrayCB : register( b0 )
+{
+	int startNode;
+	int numNodes;
+	int positionOffset;
+	int positionStride;
+	
+	int normalOffset;	
+	int normalStride;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexNormals : register( t1 );
+
+RWBuffer<float> g_vertexBuffer : register( u0 );
+
+
+[numthreads(128, 1, 1)]
+void 
+OutputToVertexArrayWithNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;
+		
+		int normalDestination = nodeID * normalStride + normalOffset;
+		g_vertexBuffer[normalDestination] = normal.x;
+		g_vertexBuffer[normalDestination+1] = normal.y;
+		g_vertexBuffer[normalDestination+2] = normal.z;		
+	}
+}
+
+[numthreads(128, 1, 1)]
+void 
+OutputToVertexArrayWithoutNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;		
+	}
+}
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl
@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+cbuffer PrepareLinksCB : register( b0 )
+{
+	int numLinks;
+	int padding0;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+StructuredBuffer<float> g_linksMassLSC : register( t1 );
+StructuredBuffer<float4> g_nodesPreviousPosition : register( t2 );
+
+RWStructuredBuffer<float> g_linksLengthRatio : register( u0 );
+RWStructuredBuffer<float4> g_linksCurrentLength : register( u1 );
+
+[numthreads(128, 1, 1)]
+void 
+PrepareLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x;
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
+		float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
+
+		float massLSC = g_linksMassLSC[linkID];
+		
+		float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
+		
+		float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
+		linkLengthRatio = 1./linkLengthRatio;
+		
+		g_linksCurrentLength[linkID] = linkCurrentLength;
+		g_linksLengthRatio[linkID] = linkLengthRatio;		
+	}
+}
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl
@ -0,0 +1,55 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	int startLink;
+	int numLinks;
+	float kst;
+	float ti;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+
+StructuredBuffer<float> g_linksMassLSC : register( t1 );
+StructuredBuffer<float> g_linksRestLengthSquared : register( t2 );
+StructuredBuffer<float> g_verticesInverseMass : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+
+[numthreads(128, 1, 1)]
+void 
+SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x + startLink;
+	if( DTid.x < numLinks )
+	{	
+		float massLSC = g_linksMassLSC[linkID];
+		float restLengthSquared = g_linksRestLengthSquared[linkID];
+		
+		if( massLSC > 0.0f )
+		{		
+			int2 nodeIndices = g_linksVertexIndices[linkID];
+			int node0 = nodeIndices.x;
+			int node1 = nodeIndices.y;
+			
+			float3 position0 = g_vertexPositions[node0].xyz;
+			float3 position1 = g_vertexPositions[node1].xyz;
+
+			float inverseMass0 = g_verticesInverseMass[node0];
+			float inverseMass1 = g_verticesInverseMass[node1]; 
+
+			float3 del = position1 - position0;
+			float len = dot(del, del);
+			float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+
+			g_vertexPositions[node0] = float4(position0, 0.f);
+			g_vertexPositions[node1] = float4(position1, 0.f);
+
+		}
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl
@ -0,0 +1,147 @@
+MSTRINGIFY(
+
+
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	int startWaveInBatch;
+	int numWaves;
+	float kst;		
+	float ti;
+};
+
+
+// Number of batches per wavefront stored one element per logical wavefront
+StructuredBuffer<int2> g_wavefrontBatchCountsVertexCounts : register( t0 );
+// Set of up to maxNumVertices vertex addresses per wavefront
+StructuredBuffer<int> g_vertexAddressesPerWavefront : register( t1 );
+
+StructuredBuffer<float> g_verticesInverseMass : register( t2 );
+
+// Per-link data layed out structured in terms of sub batches within wavefronts
+StructuredBuffer<int2> g_linksVertexIndices : register( t3 );
+StructuredBuffer<float> g_linksMassLSC : register( t4 );
+StructuredBuffer<float> g_linksRestLengthSquared : register( t5 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+
+// Data loaded on a per-wave basis
+groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER];
+groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
+groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
+
+// Storing the vertex addresses actually slowed things down a little
+//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
+
+
+[numthreads(BLOCK_SIZE, 1, 1)]
+void 
+SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1));
+	const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE);
+	const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER;
+	const int localWavefront = wavefront - firstWavefrontInBlock;
+
+	int batchesWithinWavefront = 0;
+	int verticesUsedByWave = 0;
+	int cond = wavefront < (startWaveInBatch + numWaves);
+
+	// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier	
+	if( cond)
+	{
+
+		// Load the batch counts for the wavefronts
+
+		int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
+
+		batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
+		verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
+
+		// Load the vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			//vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress;
+			vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
+			vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
+		}
+		
+	}
+		// Ensure compiler does not re-order memory operations
+		//AllMemoryBarrier();
+	AllMemoryBarrierWithGroupSync ();
+		
+	if( cond)
+	{
+		// Loop through the batches performing the solve on each in LDS
+		int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;	
+
+		//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
+		
+		int batch = 0;
+		do
+		{
+			int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
+			int locationOfValue = baseDataLocation + laneInWavefront;
+			
+			
+			// These loads should all be perfectly linear across the WF
+			int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
+			float massLSC = g_linksMassLSC[locationOfValue];
+			float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
+			
+
+			// LDS vertex addresses based on logical wavefront number in block and loaded index
+			int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
+			int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
+			
+			float3 position0 = vertexPositionSharedData[vertexAddress0].xyz;
+			float3 position1 = vertexPositionSharedData[vertexAddress1].xyz;
+
+			float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
+			float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; 
+
+			float3 del = position1 - position0;
+			float len = dot(del, del);
+			
+			float k = 0;
+			if( massLSC > 0.0f )
+			{		
+				k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			}
+			
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+			
+			// Ensure compiler does not re-order memory operations
+			AllMemoryBarrier();				
+
+			vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f);
+			vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f);
+			
+			// Ensure compiler does not re-order memory operations
+			AllMemoryBarrier();
+				
+			
+			++batch;
+		} while( batch < batchesWithinWavefront );
+		
+		// Update the global memory vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+		}
+	}
+		
+		
+}
+
+
+
+
+);
+
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl
@ -0,0 +1,48 @@
+MSTRINGIFY(
+
+cbuffer UpdateConstantsCB : register( b0 )
+{
+	int numLinks;
+	int padding0;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+StructuredBuffer<float> g_vertexInverseMasses : register( t2 );
+StructuredBuffer<float> g_linksMaterialLSC : register( t3 );
+
+RWStructuredBuffer<float> g_linksMassLSC : register( u0 );
+RWStructuredBuffer<float> g_linksRestLengthSquared : register( u1 );
+RWStructuredBuffer<float> g_linksRestLengths : register( u2 );
+
+[numthreads(128, 1, 1)]
+void 
+UpdateConstantsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x;
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
+		
+		float3 position0 = g_vertexPositions[node0].xyz;
+		float3 position1 = g_vertexPositions[node1].xyz;
+		float inverseMass0 = g_vertexInverseMasses[node0];
+		float inverseMass1 = g_vertexInverseMasses[node1];
+
+		float3 difference = position0 - position1;
+		float length2 = dot(difference, difference);
+		float length = sqrt(length2);
+	
+		g_linksRestLengths[linkID] = length;
+		g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
+		g_linksRestLengthSquared[linkID] = length*length;		
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl
@ -0,0 +1,49 @@
+MSTRINGIFY(
+
+cbuffer UpdateVelocitiesFromPositionsWithVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float isolverdt;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<int> g_vertexClothIndices : register( t2 );
+StructuredBuffer<float> g_clothVelocityCorrectionCoefficients : register( t3 );
+StructuredBuffer<float> g_clothDampingFactor : register( t4 );
+
+RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
+RWStructuredBuffer<float4> g_vertexForces : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+updateVelocitiesFromPositionsWithVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocities[nodeID].xyz;
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float3 difference = position - previousPosition;
+				
+		velocity += difference*velocityCorrectionCoefficient*isolverdt;
+		
+		// Damp the velocity
+		velocity *= velocityCoefficient;
+		
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);
+		g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl
@ -0,0 +1,98 @@
+MSTRINGIFY(
+
+cbuffer UpdateSoftBodiesCB : register( b0 )
+{
+	unsigned int numNodes;
+	unsigned int startFace;
+	unsigned int numFaces;
+	float epsilon;
+};
+
+
+// Node indices for each link
+StructuredBuffer<int4> g_triangleVertexIndexSet : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+StructuredBuffer<int> g_vertexTriangleCount : register( t2 );
+
+RWStructuredBuffer<float4> g_vertexNormals : register( u0 );
+RWStructuredBuffer<float> g_vertexArea : register( u1 );
+RWStructuredBuffer<float4> g_triangleNormals : register( u2 );
+RWStructuredBuffer<float> g_triangleArea : register( u3 );
+
+
+[numthreads(128, 1, 1)]
+void 
+ResetNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	if( DTid.x < numNodes )
+	{
+		g_vertexNormals[DTid.x] = float4(0.0f, 0.0f, 0.0f, 0.0f);
+		g_vertexArea[DTid.x] = 0.0f;
+	}
+}
+
+
+[numthreads(128, 1, 1)]
+void 
+UpdateSoftBodiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int faceID = DTid.x + startFace;
+	if( DTid.x < numFaces )
+	{		
+		int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
+		int nodeIndex0 = triangleIndexSet.x;
+		int nodeIndex1 = triangleIndexSet.y;
+		int nodeIndex2 = triangleIndexSet.z;
+
+		float3 node0 = g_vertexPositions[nodeIndex0].xyz;
+		float3 node1 = g_vertexPositions[nodeIndex1].xyz;
+		float3 node2 = g_vertexPositions[nodeIndex2].xyz;
+		float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
+		float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
+		float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
+		float vertexArea0 = g_vertexArea[nodeIndex0];
+		float vertexArea1 = g_vertexArea[nodeIndex1];
+		float vertexArea2 = g_vertexArea[nodeIndex2];
+		
+		float3 vector0 = node1 - node0;
+		float3 vector1 = node2 - node0;
+		
+		float3 faceNormal = cross(vector0.xyz, vector1.xyz);
+		float triangleArea = length(faceNormal);
+
+		nodeNormal0 = nodeNormal0 + faceNormal;
+		nodeNormal1 = nodeNormal1 + faceNormal;
+		nodeNormal2 = nodeNormal2 + faceNormal;
+		vertexArea0 = vertexArea0 + triangleArea;
+		vertexArea1 = vertexArea1 + triangleArea;
+		vertexArea2 = vertexArea2 + triangleArea;
+		
+		g_triangleNormals[faceID] = float4(normalize(faceNormal), 0.f);
+		g_vertexNormals[nodeIndex0] = float4(nodeNormal0, 0.f);
+		g_vertexNormals[nodeIndex1] = float4(nodeNormal1, 0.f);
+		g_vertexNormals[nodeIndex2] = float4(nodeNormal2, 0.f);
+		g_triangleArea[faceID] = triangleArea;
+		g_vertexArea[nodeIndex0] = vertexArea0;
+		g_vertexArea[nodeIndex1] = vertexArea1;
+		g_vertexArea[nodeIndex2] = vertexArea2;
+	}
+}
+
+[numthreads(128, 1, 1)]
+void 
+NormalizeNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	if( DTid.x < numNodes )
+	{
+		float4 normal = g_vertexNormals[DTid.x];
+		float area = g_vertexArea[DTid.x];
+		int numTriangles = g_vertexTriangleCount[DTid.x];
+		
+		float vectorLength = length(normal);
+		
+		g_vertexNormals[DTid.x] = normalize(normal);
+		g_vertexArea[DTid.x] = area/float(numTriangles);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl
@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+cbuffer UpdateVelocitiesFromPositionsWithoutVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float isolverdt;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<int> g_vertexClothIndices : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
+RWStructuredBuffer<float4> g_vertexForces : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+updateVelocitiesFromPositionsWithoutVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocities[nodeID].xyz;
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float3 difference = position - previousPosition;
+				
+		velocity = difference*velocityCoefficient*isolverdt;		
+		
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);
+		g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl
@ -0,0 +1,35 @@
+MSTRINGIFY(
+
+cbuffer UpdatePositionsFromVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float solverSDT;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexVelocities : register( t0 );
+
+RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u0 );
+RWStructuredBuffer<float4> g_vertexCurrentPosition : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+UpdatePositionsFromVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int vertexID = DTid.x;
+	if( vertexID < numNodes )
+	{	
+		float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
+		float3 velocity = g_vertexVelocities[vertexID].xyz;
+		
+		float3 newPosition = previousPosition + velocity*solverSDT;
+		
+		g_vertexCurrentPosition[vertexID] = float4(newPosition, 0.f);
+		g_vertexPreviousPositions[vertexID] = float4(newPosition, 0.f);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl
@ -0,0 +1,55 @@
+MSTRINGIFY(
+
+cbuffer VSolveLinksCB : register( b0 )
+{
+	int startLink;
+	int numLinks;
+	float kst;
+	int padding;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+
+StructuredBuffer<float> g_linksLengthRatio : register( t1 );
+StructuredBuffer<float4> g_linksCurrentLength : register( t2 );
+StructuredBuffer<float> g_vertexInverseMass : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexVelocity : register( u0 );
+
+[numthreads(128, 1, 1)]
+void 
+VSolveLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x + startLink;
+	if( DTid.x < numLinks )
+	{		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float linkLengthRatio = g_linksLengthRatio[linkID];
+		float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
+		
+		float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
+		float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
+
+		float vertexInverseMass0 = g_vertexInverseMass[node0];
+		float vertexInverseMass1 = g_vertexInverseMass[node1]; 
+
+		float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
+		float dotResult = dot(linkCurrentLength, nodeDifference);
+		float j = -dotResult*linkLengthRatio*kst;
+		
+		float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
+		float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
+		
+		vertexVelocity0 += velocityChange0;
+		vertexVelocity1 -= velocityChange1;
+
+		g_vertexVelocity[node0] = float4(vertexVelocity0, 0.f);
+		g_vertexVelocity[node1] = float4(vertexVelocity1, 0.f);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocities.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocities.hlsl
@ -0,0 +1,170 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	unsigned int numNodes;
+	float isolverdt;
+	int padding0;
+	int padding1;
+};
+
+struct CollisionObjectIndices
+{
+	int firstObject;
+	int endObject;
+};
+
+struct CollisionShapeDescription
+{
+	float4x4 shapeTransform;
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	int padding1;
+	
+};
+
+// From btBroadphaseProxy.h
+static const int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Node indices for each link
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<float> g_perClothFriction : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
+StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
+
+RWStructuredBuffer<float4> g_vertexForces : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
+RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
+
+[numthreads(128, 1, 1)]
+void 
+SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	float3 forceOnVertex = float3(0.f, 0.f, 0.f);
+	if( DTid.x < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+		float3 velocity;
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
+		{
+			velocity = float3(15, 0, 0);
+
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+		
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					float4x4 worldTransform = shapeDescription.shapeTransform;
+
+					float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f); 
+					float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
+					float4 worldC1 = mul(worldTransform, c1);
+					float4 worldC2 = mul(worldTransform, c2);
+					float3 segment = (worldC2 - worldC1).xyz;
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
+
+					float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float3 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = normalize(position - worldC1).xyz;
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = normalize(position - worldC2).xyz;	
+					} else {
+						dist = distanceFromLine;
+						normalVector = normalize(position - closestPoint).xyz;
+					}
+						
+					float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
+					float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
+					float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
+						velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+						float3 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float3 p1 = normalize(cross(normalVector, segment));
+						float3 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0 )
+							forceOnVertex -= frictionVector;
+					}
+					
+				}
+			}
+		} else {
+			// Update velocity	
+			float3 difference = position.xyz - previousPosition.xyz;
+			velocity = difference*velocityCoefficient*isolverdt;			
+		}
+
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
+
+		g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl
@ -0,0 +1,191 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	unsigned int numNodes;
+	float isolverdt;
+	int padding0;
+	int padding1;
+};
+
+struct CollisionObjectIndices
+{
+	int firstObject;
+	int endObject;
+};
+
+struct CollisionShapeDescription
+{
+	float4x4 shapeTransform;
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	int padding1;
+	
+};
+
+// From btBroadphaseProxy.h
+static const int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Node indices for each link
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<float> g_perClothFriction : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
+StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
+
+RWStructuredBuffer<float4> g_vertexForces : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
+RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
+
+// A buffer of local collision shapes
+// TODO: Iterate to support more than 16
+groupshared CollisionShapeDescription localCollisionShapes[16];
+
+[numthreads(128, 1, 1)]
+void 
+SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	float3 forceOnVertex = float3(0.f, 0.f, 0.f);
+
+	int clothIdentifier = g_vertexClothIdentifier[nodeID];
+	float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
+	float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+	float3 velocity;
+	float clothFriction = g_perClothFriction[clothIdentifier];
+	float dampingFactor = g_clothDampingFactor[clothIdentifier];
+	float velocityCoefficient = (1.f - dampingFactor);		
+	CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+	int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+	if( numObjects > 0 )
+	{
+		// We have some possible collisions to deal with
+		
+		// First load all of the collision objects into LDS
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		if( GTid.x < numObjects )
+		{
+			localCollisionShapes[GTid.x] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + GTid.x ];
+		}
+	}
+
+	// Safe as the vertices are padded so that not more than one soft body is in a group
+	AllMemoryBarrierWithGroupSync();
+
+	// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
+	if( numObjects > 0 )
+	{
+		velocity = float3(0, 0, 0);
+		
+		
+		// We have some possible collisions to deal with
+		for( int collision = 0; collision < numObjects; ++collision )
+		{
+			CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
+			float colliderFriction = shapeDescription.friction;
+		
+			if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+			{
+				// Colliding with a capsule
+
+				float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
+				float capsuleRadius = localCollisionShapes[collision].radius;
+				float capsuleMargin = localCollisionShapes[collision].margin;
+
+				float4x4 worldTransform = localCollisionShapes[collision].shapeTransform;
+
+				float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f); 
+				float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
+				float4 worldC1 = mul(worldTransform, c1);
+				float4 worldC2 = mul(worldTransform, c2);
+				float3 segment = (worldC2 - worldC1).xyz;
+
+				// compute distance of tangent to vertex along line segment in capsule
+				float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
+
+				float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
+				float distanceFromLine = length(position - closestPoint);
+				float distanceFromC1 = length(worldC1 - position);
+				float distanceFromC2 = length(worldC2 - position);
+					
+				// Final distance from collision, point to push from, direction to push in
+				// for impulse force
+				float dist;
+				float3 normalVector;
+				if( distanceAlongSegment < 0 )
+				{
+					dist = distanceFromC1;
+					normalVector = normalize(position - worldC1).xyz;
+				} else if( distanceAlongSegment > 1.f ) {
+					dist = distanceFromC2;
+					normalVector = normalize(position - worldC2).xyz;	
+				} else {
+					dist = distanceFromLine;
+					normalVector = normalize(position - closestPoint).xyz;
+				}
+						
+				float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
+				float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
+				float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
+
+				float minDistance = capsuleRadius + capsuleMargin;
+					
+				// In case of no collision, this is the value of velocity
+				velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+					
+					
+				// Check for a collision
+				if( dist < minDistance )
+				{
+					// Project back to surface along normal
+					position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
+					velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+					float3 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+					float3 p1 = normalize(cross(normalVector, segment));
+					float3 p2 = normalize(cross(p1, normalVector));
+					// Full friction is sum of velocities in each direction of plane
+					float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
+
+					// Real friction is peak friction corrected by friction coefficients
+					frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+					float approachSpeed = dot(relativeVelocity, normalVector);
+
+					if( approachSpeed <= 0.0 )
+						forceOnVertex -= frictionVector;
+				}
+					
+			}
+		}
+	} else {
+		// Update velocity	
+		float3 difference = position.xyz - previousPosition.xyz;
+		velocity = difference*velocityCoefficient*isolverdt;			
+	}
+
+	g_vertexVelocities[nodeID] = float4(velocity, 0.f);	
+
+	// Update external force
+	g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
+
+	g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h
@ -0,0 +1,323 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
+#define BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
+
+// DX11 support
+#include <windows.h>
+#include <crtdbg.h>
+#include <d3d11.h>
+#include <d3dx11.h>
+#include <d3dcompiler.h>
+
+#ifndef SAFE_RELEASE
+#define SAFE_RELEASE(p)      { if(p) { (p)->Release(); (p)=NULL; } }
+#endif
+
+/**
+ * DX11 Buffer that tracks a host buffer on use to ensure size-correctness.
+ */
+template <typename ElementType> class btDX11Buffer
+{
+protected:
+	ID3D11Device*				m_d3dDevice;
+	ID3D11DeviceContext*		m_d3dDeviceContext;
+
+	ID3D11Buffer*               m_Buffer;
+	ID3D11ShaderResourceView*   m_SRV;
+	ID3D11UnorderedAccessView*  m_UAV;
+	btAlignedObjectArray< ElementType >*	m_CPUBuffer;
+
+	// TODO: Separate this from the main class
+	// as read back buffers can be shared between buffers
+	ID3D11Buffer*               m_readBackBuffer;
+
+	int m_gpuSize;
+	bool m_onGPU;
+
+	bool m_readOnlyOnGPU;
+	
+	bool createBuffer( ID3D11Buffer *preexistingBuffer = 0)
+	{
+		HRESULT hr = S_OK;
+
+		// Create all CS buffers
+		if( preexistingBuffer )
+		{
+			m_Buffer = preexistingBuffer;
+		} else {
+			D3D11_BUFFER_DESC buffer_desc;
+			ZeroMemory(&buffer_desc, sizeof(buffer_desc));		
+			buffer_desc.Usage = D3D11_USAGE_DEFAULT;
+			if( m_readOnlyOnGPU )
+				buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
+			else
+				buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
+			buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
+			
+			buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
+			// At a minimum the buffer must exist
+			if( buffer_desc.ByteWidth == 0 )
+				buffer_desc.ByteWidth = sizeof(ElementType);
+			buffer_desc.StructureByteStride = sizeof(ElementType);
+			hr = m_d3dDevice->CreateBuffer(&buffer_desc, NULL, &m_Buffer);
+			if( FAILED( hr ) )
+		        return (hr==S_OK);
+		} 
+
+		if( m_readOnlyOnGPU )
+		{
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
+			ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
+			srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+
+			srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
+			if( srvbuffer_desc.Buffer.ElementWidth == 0 )
+				srvbuffer_desc.Buffer.ElementWidth = 1;
+			hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+		} else {
+			// Create SRV
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
+			ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
+			srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+
+			srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
+			if( srvbuffer_desc.Buffer.ElementWidth == 0 )
+				srvbuffer_desc.Buffer.ElementWidth = 1;
+			hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+
+			// Create UAV
+			D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
+			ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
+			uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
+
+			uavbuffer_desc.Buffer.NumElements = m_CPUBuffer->size();
+			if( uavbuffer_desc.Buffer.NumElements == 0 )
+				uavbuffer_desc.Buffer.NumElements = 1;
+			hr = m_d3dDevice->CreateUnorderedAccessView(m_Buffer, &uavbuffer_desc, &m_UAV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+
+			// Create read back buffer
+			D3D11_BUFFER_DESC readback_buffer_desc;
+			ZeroMemory(&readback_buffer_desc, sizeof(readback_buffer_desc));
+
+			readback_buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
+			readback_buffer_desc.Usage = D3D11_USAGE_STAGING;
+			readback_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+			readback_buffer_desc.StructureByteStride = sizeof(ElementType);
+			hr = m_d3dDevice->CreateBuffer(&readback_buffer_desc, NULL, &m_readBackBuffer);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+		}
+
+		m_gpuSize = m_CPUBuffer->size();
+		return true;
+	}
+
+
+
+public:
+	btDX11Buffer( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext, btAlignedObjectArray< ElementType > *CPUBuffer, bool readOnly )
+	{
+		m_d3dDevice = d3dDevice;
+		m_d3dDeviceContext = d3dDeviceContext;
+		m_Buffer = 0;
+		m_SRV = 0;
+		m_UAV = 0;
+		m_readBackBuffer = 0;
+
+		m_CPUBuffer = CPUBuffer;
+
+		m_gpuSize = 0;
+		m_onGPU = false;
+
+		m_readOnlyOnGPU = readOnly;
+	}
+
+	virtual ~btDX11Buffer()
+	{
+		SAFE_RELEASE(m_Buffer);
+		SAFE_RELEASE(m_SRV);
+		SAFE_RELEASE(m_UAV);
+		SAFE_RELEASE(m_readBackBuffer);
+	}
+
+	ID3D11ShaderResourceView* &getSRV()
+	{
+		return m_SRV;
+	}
+
+	ID3D11UnorderedAccessView* &getUAV()
+	{
+		return m_UAV;
+	}
+
+	ID3D11Buffer* &getBuffer()
+	{
+		return m_Buffer;
+	}
+
+	/**
+	 * Move the data to the GPU if it is not there already.
+	 */
+	bool moveToGPU()
+	{
+		// Reallocate if GPU size is too small
+		if( (m_CPUBuffer->size() > m_gpuSize ) )
+			m_onGPU = false;
+		if( !m_onGPU && m_CPUBuffer->size() > 0 )
+		{
+			// If the buffer doesn't exist or the CPU-side buffer has changed size, create
+			// We should really delete the old one, too, but let's leave that for later
+			if( !m_Buffer || (m_CPUBuffer->size() != m_gpuSize) )
+			{
+				SAFE_RELEASE(m_Buffer);
+				SAFE_RELEASE(m_SRV);
+				SAFE_RELEASE(m_UAV);
+				SAFE_RELEASE(m_readBackBuffer);
+				if( !createBuffer() )
+				{
+					btAssert("Buffer creation failed.");
+					return false;
+				}
+			}
+
+			if( m_gpuSize > 0 )
+			{
+				D3D11_BOX destRegion;
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->UpdateSubresource(m_Buffer, 0, &destRegion, &((*m_CPUBuffer)[0]), 0, 0);
+
+				m_onGPU = true;
+			}
+
+		}
+
+		return true;
+	}
+
+	/**
+	 * Move the data back from the GPU if it is on there and isn't read only.
+	 */
+	bool moveFromGPU()
+	{
+		if( m_CPUBuffer->size() > 0 )
+		{
+			if( m_onGPU && !m_readOnlyOnGPU )
+			{
+				// Copy back
+				D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; 
+				//m_pd3dImmediateContext->CopyResource(m_phAngVelReadBackBuffer, m_phAngVel);
+
+				D3D11_BOX destRegion;	
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->CopySubresourceRegion(
+					m_readBackBuffer,
+					0,
+					0,
+					0,
+					0 ,
+					m_Buffer,
+					0,
+					&destRegion
+					);
+
+				m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);   
+				//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
+				memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));		
+				m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
+
+				m_onGPU = false;
+			}
+		}
+
+		return true;
+	}
+
+
+	/**
+	 * Copy the data back from the GPU without changing its state to be CPU-side.
+	 * Useful if we just want to view it on the host for visualization.
+	 */
+	bool copyFromGPU()
+	{
+		if( m_CPUBuffer->size() > 0 )
+		{
+			if( m_onGPU && !m_readOnlyOnGPU )
+			{
+				// Copy back
+				D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; 
+
+				D3D11_BOX destRegion;	
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->CopySubresourceRegion(
+					m_readBackBuffer,
+					0,
+					0,
+					0,
+					0 ,
+					m_Buffer,
+					0,
+					&destRegion
+					);
+
+				m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);   
+				//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
+				memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));		
+				m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Call if data has changed on the CPU.
+	 * Can then trigger a move to the GPU as necessary.
+	 */
+	virtual void changedOnCPU()
+	{
+		m_onGPU = false;
+	}
+}; // class btDX11Buffer
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h
@ -0,0 +1,103 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+
+class btSoftBodyLinkDataDX11 : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+
+	btDX11Buffer<LinkNodePair>				m_dx11Links;
+	btDX11Buffer<float>											m_dx11LinkStrength;
+	btDX11Buffer<float>											m_dx11LinksMassLSC;
+	btDX11Buffer<float>											m_dx11LinksRestLengthSquared;
+	btDX11Buffer<Vectormath::Aos::Vector3>						m_dx11LinksCLength;
+	btDX11Buffer<float>											m_dx11LinksLengthRatio;
+	btDX11Buffer<float>											m_dx11LinksRestLength;
+	btDX11Buffer<float>											m_dx11LinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+	
+	btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyLinkDataDX11();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h
@ -0,0 +1,173 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
+#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+
+class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+	const int m_wavefrontSize;
+	const int m_linksPerWorkItem;
+	const int m_maxLinksPerWavefront;
+	int m_maxBatchesWithinWave;
+	int m_maxVerticesWithinWave;
+	int m_numWavefronts;
+
+	int m_maxVertex;
+
+	struct NumBatchesVerticesPair
+	{
+		int numBatches;
+		int numVertices;
+	};
+
+	// Array storing number of links in each wavefront
+	btAlignedObjectArray<int>									m_linksPerWavefront;
+	btAlignedObjectArray<NumBatchesVerticesPair>				m_numBatchesAndVerticesWithinWaves;
+	btDX11Buffer< NumBatchesVerticesPair >						m_dx11NumBatchesAndVerticesWithinWaves;
+
+	// All arrays here will contain batches of m_maxLinksPerWavefront links
+	// ordered by wavefront.
+	// with either global vertex pairs or local vertex pairs
+	btAlignedObjectArray< int >									m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
+	btDX11Buffer<int>											m_dx11WavefrontVerticesGlobalAddresses;
+	btAlignedObjectArray< LinkNodePair >						m_linkVerticesLocalAddresses; // Vertex pair for the link
+	btDX11Buffer<LinkNodePair>									m_dx11LinkVerticesLocalAddresses;
+	btDX11Buffer<float>											m_dx11LinkStrength;
+	btDX11Buffer<float>											m_dx11LinksMassLSC;
+	btDX11Buffer<float>											m_dx11LinksRestLengthSquared;
+	btDX11Buffer<float>											m_dx11LinksRestLength;
+	btDX11Buffer<float>											m_dx11LinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_wavefrontBatchStartLengths;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+	
+	btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyLinkDataDX11SIMDAware();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+
+	int getMaxVerticesPerWavefront()
+	{
+		return m_maxVerticesWithinWave;
+	}
+
+	int getWavefrontSize()
+	{
+		return m_wavefrontSize;
+	}
+
+	int getLinksPerWorkItem()
+	{
+		return m_linksPerWorkItem;
+	}
+
+	int getMaxLinksPerWavefront()
+	{
+		return m_maxLinksPerWavefront;
+	}
+
+	int getMaxBatchesPerWavefront()
+	{
+		return m_maxBatchesWithinWave;
+	}
+
+	int getNumWavefronts()
+	{
+		return m_numWavefronts;
+	}
+
+	NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
+	{
+		return m_numBatchesAndVerticesWithinWaves[wavefront];
+	}
+
+	int getVertexGlobalAddresses( int vertexIndex )
+	{
+		return m_wavefrontVerticesGlobalAddresses[vertexIndex];
+	}
+
+	/**
+	 * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
+	 */
+	LinkNodePair getVertexPairLocalAddresses( int linkIndex )
+	{
+		return m_linkVerticesLocalAddresses[linkIndex];
+	}
+
+};
+
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h
@ -0,0 +1,96 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
+#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+class btSoftBodyTriangleDataDX11 : public btSoftBodyTriangleData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+	btDX11Buffer<btSoftBodyTriangleData::TriangleNodeSet>							m_dx11VertexIndices;
+	btDX11Buffer<float>									m_dx11Area;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11Normal;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_triangleAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+	//ID3D11Buffer*               readBackBuffer;
+
+public:
+	btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyTriangleDataDX11();
+
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createTriangles( int numTriangles );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
+
+	virtual bool onAccelerator();
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+	/**
+	 * Generate (and later update) the batching for the entire triangle set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h
@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H 
+
+
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+
+#include <windows.h>
+#include <crtdbg.h>
+#include <d3d11.h>
+#include <d3dx11.h>
+#include <d3dcompiler.h>
+
+class btDX11VertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	/** Context of the DX11 device on which the vertex buffer is stored. */
+	ID3D11DeviceContext* m_context;
+	/** DX11 vertex buffer */
+	ID3D11Buffer* m_vertexBuffer;
+	/** UAV for DX11 buffer */
+	ID3D11UnorderedAccessView*  m_vertexBufferUAV;
+
+
+public:
+	/**
+	 * buffer is a pointer to the DX11 buffer to place the vertex data in.
+	 * UAV is a pointer to the UAV representation of the buffer laid out in floats.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride )
+	{
+		m_context = context;
+		m_vertexBuffer = buffer;
+		m_vertexBufferUAV = UAV;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+	}
+
+	/**
+	 * buffer is a pointer to the DX11 buffer to place the vertex data in.
+	 * UAV is a pointer to the UAV representation of the buffer laid out in floats.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 * normalOffset is the offset in floats to the first normal.
+	 * normalStride is the stride in floats between normals.
+	 */
+	btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+		m_context = context;
+		m_vertexBuffer = buffer;
+		m_vertexBufferUAV = UAV;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+		
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+	}
+
+	virtual ~btDX11VertexBufferDescriptor()
+	{
+
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return DX11_BUFFER;
+	}
+
+	virtual ID3D11DeviceContext* getContext() const
+	{
+		return m_context;
+	}
+
+	virtual ID3D11Buffer* getbtDX11Buffer() const
+	{
+		return m_vertexBuffer;
+	}
+
+	virtual ID3D11UnorderedAccessView* getDX11UAV() const
+	{
+		return m_vertexBufferUAV;
+	}		
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h
@ -0,0 +1,63 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+#define BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+
+class btSoftBodyLinkData;
+class btSoftBodyLinkData::LinkDescription;
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+class btSoftBodyVertexDataDX11 : public btSoftBodyVertexData
+{
+protected:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+public:
+	btDX11Buffer<int>										m_dx11ClothIdentifier;
+	btDX11Buffer<Vectormath::Aos::Point3>					m_dx11VertexPosition;
+	btDX11Buffer<Vectormath::Aos::Point3>					m_dx11VertexPreviousPosition;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexVelocity;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexForceAccumulator;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexNormal;
+	btDX11Buffer<float>									m_dx11VertexInverseMass;
+	btDX11Buffer<float>									m_dx11VertexArea;
+	btDX11Buffer<int>										m_dx11VertexTriangleCount;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+
+public:
+	btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+	virtual ~btSoftBodyVertexDataDX11();
+
+	virtual bool onAccelerator();
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
+};
+
+
+#endif // #ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h
@ -0,0 +1,691 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+
+
+#include "vectormath/vmInclude.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "btSoftBodySolverLinkData_DX11.h"
+#include "btSoftBodySolverVertexData_DX11.h"
+#include "btSoftBodySolverTriangleData_DX11.h"
+
+
+
+class DXFunctions
+{
+public:
+	
+	typedef HRESULT (WINAPI * CompileFromMemoryFunc)(LPCSTR,SIZE_T,LPCSTR,const D3D10_SHADER_MACRO*,LPD3D10INCLUDE,LPCSTR,LPCSTR,UINT,UINT,ID3DX11ThreadPump*,ID3D10Blob**,ID3D10Blob**,HRESULT*);
+
+	ID3D11Device *		 m_dx11Device;
+	ID3D11DeviceContext* m_dx11Context;
+	CompileFromMemoryFunc m_dx11CompileFromMemory;
+
+	DXFunctions(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, CompileFromMemoryFunc dx11CompileFromMemory) :
+		m_dx11Device( dx11Device ),
+		m_dx11Context( dx11Context ),
+		m_dx11CompileFromMemory( dx11CompileFromMemory )
+	{
+
+	}
+
+	class KernelDesc
+	{
+	protected:
+		
+
+	public:
+		ID3D11ComputeShader* kernel;
+		ID3D11Buffer* constBuffer;
+
+		KernelDesc()
+		{
+			kernel = 0;
+			constBuffer = 0;
+		}
+
+		virtual ~KernelDesc()
+		{
+			// TODO: this should probably destroy its kernel but we need to be careful
+			// in case KernelDescs are copied
+		}
+	}; 
+
+	/**
+	 * Compile a compute shader kernel from a string and return the appropriate KernelDesc object.
+	 */
+	KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 );
+
+};
+
+class btDX11SoftBodySolver : public btSoftBodySolver
+{
+protected:
+	/**
+	 * Entry in the collision shape array.
+	 * Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
+	 */
+	struct CollisionShapeDescription
+	{
+		Vectormath::Aos::Transform3 shapeTransform;
+		Vectormath::Aos::Vector3 linearVelocity;
+		Vectormath::Aos::Vector3 angularVelocity;
+
+		int softBodyIdentifier;
+		int collisionShapeType;
+	
+		// Both needed for capsule
+		float radius;
+		float halfHeight;
+		
+		float margin;
+		float friction;
+
+		CollisionShapeDescription()
+		{
+			collisionShapeType = 0;
+			margin = 0;
+			friction = 0;
+		}
+	};
+
+	struct UIntVector3
+	{
+		UIntVector3()
+		{
+			x = 0;
+			y = 0;
+			z = 0;
+			_padding = 0;
+		}
+		
+		UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
+		{
+			x = x_;
+			y = y_;
+			z = z_;
+			_padding = 0;
+		}
+			
+		unsigned int x;
+		unsigned int y;
+		unsigned int z;
+		unsigned int _padding;
+	};
+
+
+
+public:
+	/**
+	 * SoftBody class to maintain information about a soft body instance
+	 * within a solver.
+	 * This data addresses the main solver arrays.
+	 */
+	class btAcceleratedSoftBodyInterface
+	{
+	protected:
+		/** Current number of vertices that are part of this cloth */
+		int m_numVertices;
+		/** Maximum number of vertices allocated to be part of this cloth */
+		int m_maxVertices;
+		/** Current number of triangles that are part of this cloth */
+		int m_numTriangles;
+		/** Maximum number of triangles allocated to be part of this cloth */
+		int m_maxTriangles;
+		/** Index of first vertex in the world allocated to this cloth */
+		int m_firstVertex;
+		/** Index of first triangle in the world allocated to this cloth */
+		int m_firstTriangle;
+		/** Index of first link in the world allocated to this cloth */
+		int m_firstLink;
+		/** Maximum number of links allocated to this cloth */
+		int m_maxLinks;
+		/** Current number of links allocated to this cloth */
+		int m_numLinks;
+
+		/** The actual soft body this data represents */
+		btSoftBody *m_softBody;
+
+
+	public:
+		btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
+		  m_softBody( softBody )
+		{
+			m_numVertices = 0;
+			m_maxVertices = 0;
+			m_numTriangles = 0;
+			m_maxTriangles = 0;
+			m_firstVertex = 0;
+			m_firstTriangle = 0;
+			m_firstLink = 0;
+			m_maxLinks = 0;
+			m_numLinks = 0;
+		}
+		int getNumVertices() const
+		{
+			return m_numVertices;
+		}
+
+		int getNumTriangles() const
+		{
+			return m_numTriangles;
+		}
+
+		int getMaxVertices() const
+		{
+			return m_maxVertices;
+		}
+
+		int getMaxTriangles() const
+		{
+			return m_maxTriangles;
+		}
+
+		int getFirstVertex() const
+		{
+			return m_firstVertex;
+		}
+
+		int getFirstTriangle() const
+		{
+			return m_firstTriangle;
+		}
+
+
+		/**
+		 * Update the bounds in the btSoftBody object
+		 */
+		void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
+
+		
+		// TODO: All of these set functions will have to do checks and
+		// update the world because restructuring of the arrays will be necessary
+		// Reasonable use of "friend"?
+		void setNumVertices( int numVertices )
+		{
+			m_numVertices = numVertices;
+		}	
+	
+		void setNumTriangles( int numTriangles )
+		{
+			m_numTriangles = numTriangles;
+		}
+
+		void setMaxVertices( int maxVertices )
+		{
+			m_maxVertices = maxVertices;
+		}
+
+		void setMaxTriangles( int maxTriangles )
+		{
+			m_maxTriangles = maxTriangles;
+		}
+
+		void setFirstVertex( int firstVertex )
+		{
+			m_firstVertex = firstVertex;
+		}
+
+		void setFirstTriangle( int firstTriangle )
+		{
+			m_firstTriangle = firstTriangle;
+		}
+
+		void setMaxLinks( int maxLinks )
+		{
+			m_maxLinks = maxLinks;
+		}
+
+		void setNumLinks( int numLinks )
+		{
+			m_numLinks = numLinks;
+		}
+
+		void setFirstLink( int firstLink )
+		{
+			m_firstLink = firstLink;
+		}
+
+		int getMaxLinks()
+		{
+			return m_maxLinks;
+		}
+
+		int getNumLinks()
+		{
+			return m_numLinks;
+		}
+
+		int getFirstLink()
+		{
+			return m_firstLink;
+		}
+
+		btSoftBody* getSoftBody()
+		{
+			return m_softBody;
+		}
+
+	};
+
+	
+	struct CollisionObjectIndices
+	{
+		CollisionObjectIndices( int f, int e )
+		{
+			firstObject = f;
+			endObject = e;
+		}
+
+		int firstObject;
+		int endObject;
+	};
+
+
+
+
+
+	struct PrepareLinksCB
+	{		
+		int numLinks;
+		int padding0;
+		int padding1;
+		int padding2;
+	};
+
+	struct SolvePositionsFromLinksKernelCB
+	{		
+		int startLink;
+		int numLinks;
+		float kst;
+		float ti;
+	};
+
+	struct IntegrateCB
+	{
+		int numNodes;
+		float solverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdatePositionsFromVelocitiesCB
+	{
+		int numNodes;
+		float solverSDT;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB
+	{
+		int numNodes;
+		float isolverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateVelocitiesFromPositionsWithVelocitiesCB
+	{
+		int numNodes;
+		float isolverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateSoftBodiesCB
+	{
+		int numNodes;
+		int startFace;
+		int numFaces;
+		float epsilon;
+	};
+
+
+	struct ApplyForcesCB
+	{
+		unsigned int numNodes;
+		float solverdt;
+		float epsilon;
+		int padding3;
+	};
+
+	struct AddVelocityCB
+	{
+		int startNode;
+		int lastNode;
+		float velocityX;
+		float velocityY;
+		float velocityZ;
+		int padding1;
+		int padding2;
+		int padding3;
+	};
+
+	struct VSolveLinksCB
+	{
+		int startLink;
+		int numLinks;
+		float kst;
+		int padding;
+	};
+
+	struct ComputeBoundsCB
+	{
+		int numNodes;
+		int numSoftBodies;
+		int padding1;
+		int padding2;
+	};
+
+	struct SolveCollisionsAndUpdateVelocitiesCB
+	{
+		unsigned int numNodes;
+		float isolverdt;
+		int padding0;
+		int padding1;
+	};
+
+	
+
+
+protected:
+	ID3D11Device *		 m_dx11Device;
+	ID3D11DeviceContext* m_dx11Context;
+	
+	DXFunctions dxFunctions;
+public:
+	/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
+	btSoftBodyLinkDataDX11 m_linkData;
+	btSoftBodyVertexDataDX11 m_vertexData;
+	btSoftBodyTriangleDataDX11 m_triangleData;
+
+protected:
+
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	bool m_shadersInitialized;
+
+	/** 
+	 * Cloths owned by this solver.
+	 * Only our cloths are in this array.
+	 */
+	btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet;
+
+	/** Acceleration value to be applied to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothAcceleration;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11PerClothAcceleration;
+
+	/** Wind velocity to be applied normal to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothWindVelocity;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11PerClothWindVelocity;
+
+	/** Velocity damping factor */
+	btAlignedObjectArray< float >						m_perClothDampingFactor;
+	btDX11Buffer<float>									m_dx11PerClothDampingFactor;
+
+	/** Velocity correction coefficient */
+	btAlignedObjectArray< float >						m_perClothVelocityCorrectionCoefficient;
+	btDX11Buffer<float>									m_dx11PerClothVelocityCorrectionCoefficient;
+
+	/** Lift parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothLiftFactor;
+	btDX11Buffer<float>									m_dx11PerClothLiftFactor;
+	
+	/** Drag parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothDragFactor;
+	btDX11Buffer<float>									m_dx11PerClothDragFactor;
+
+	/** Density of the medium in which each cloth sits */
+	btAlignedObjectArray< float >						m_perClothMediumDensity;
+	btDX11Buffer<float>									m_dx11PerClothMediumDensity;
+
+	
+	/** 
+	 * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
+	 */
+	btAlignedObjectArray< CollisionObjectIndices >		m_perClothCollisionObjects;
+	btDX11Buffer<CollisionObjectIndices>				m_dx11PerClothCollisionObjects;
+
+	/** 
+	 * Collision shapes being passed across to the cloths in this solver.
+	 */
+	btAlignedObjectArray< CollisionShapeDescription >	m_collisionObjectDetails;
+	btDX11Buffer< CollisionShapeDescription >			m_dx11CollisionObjectDetails;
+
+	/** 
+	 * Minimum bounds for each cloth.
+	 * Updated by GPU and returned for use by broad phase.
+	 * These are int vectors as a reminder that they store the int representation of a float, not a float.
+	 * Bit 31 is inverted - is floats are stored with int-sortable values.
+	 */
+	btAlignedObjectArray< UIntVector3 >	m_perClothMinBounds;
+	btDX11Buffer< UIntVector3 >			m_dx11PerClothMinBounds;
+
+	/** 
+	 * Maximum bounds for each cloth.
+	 * Updated by GPU and returned for use by broad phase.
+	 * These are int vectors as a reminder that they store the int representation of a float, not a float.
+	 * Bit 31 is inverted - is floats are stored with int-sortable values.
+	 */
+	btAlignedObjectArray< UIntVector3 >	m_perClothMaxBounds;
+	btDX11Buffer< UIntVector3 >			m_dx11PerClothMaxBounds;
+
+	
+	/** 
+	 * Friction coefficient for each cloth
+	 */
+	btAlignedObjectArray< float >	m_perClothFriction;
+	btDX11Buffer< float >			m_dx11PerClothFriction;
+
+	DXFunctions::KernelDesc		prepareLinksKernel;
+	DXFunctions::KernelDesc		solvePositionsFromLinksKernel;
+	DXFunctions::KernelDesc		vSolveLinksKernel;
+	DXFunctions::KernelDesc		integrateKernel;
+	DXFunctions::KernelDesc		addVelocityKernel;
+	DXFunctions::KernelDesc		updatePositionsFromVelocitiesKernel;
+	DXFunctions::KernelDesc		updateVelocitiesFromPositionsWithoutVelocitiesKernel;
+	DXFunctions::KernelDesc		updateVelocitiesFromPositionsWithVelocitiesKernel;
+	DXFunctions::KernelDesc		solveCollisionsAndUpdateVelocitiesKernel;
+	DXFunctions::KernelDesc		resetNormalsAndAreasKernel;
+	DXFunctions::KernelDesc		normalizeNormalsAndAreasKernel;
+	DXFunctions::KernelDesc		computeBoundsKernel;
+	DXFunctions::KernelDesc		updateSoftBodiesKernel;
+
+	DXFunctions::KernelDesc		applyForcesKernel;
+
+	bool	m_enableUpdateBounds;
+
+	/**
+	 * Integrate motion on the solver.
+	 */
+	virtual void integrate( float solverdt );
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	virtual bool buildShaders();
+
+	void resetNormalsAndAreas( int numVertices );
+
+	void normalizeNormalsAndAreas( int numVertices );
+
+	void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
+
+	void prepareCollisionConstraints();
+
+	Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
+
+	void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
+
+	virtual void applyForces( float solverdt );
+	
+	virtual void updateConstants( float timeStep );
+	int findSoftBodyIndex( const btSoftBody* const softBody );
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	virtual void prepareLinks();
+
+	void updatePositionsFromVelocities( float solverdt );
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	void solveLinksForVelocity( int startLink, int numLinks, float kst );
+	
+	void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
+	void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
+	void computeBounds( );
+	void solveCollisionsAndUpdateVelocities( float isolverdt );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+
+	void updateBounds();
+
+	
+	void releaseKernels();
+
+public:
+	btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
+
+	virtual ~btDX11SoftBodySolver();
+	
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DX_SOLVER;
+	}
+
+	void	setEnableUpdateBounds(bool enableBounds)
+	{
+		m_enableUpdateBounds = enableBounds;
+	}
+	bool getEnableUpdateBounds() const
+	{
+		return  m_enableUpdateBounds;
+	}
+
+
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual btSoftBodyVertexData &getVertexData();
+
+	virtual btSoftBodyTriangleData &getTriangleData();
+
+
+
+	
+
+	btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
+	const btAcceleratedSoftBodyInterface * const findSoftBodyInterface( const btSoftBody* const softBody ) const;
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	
+	virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+};
+
+
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the DX to CPU version.
+ */
+class btSoftBodySolverOutputDXtoCPU : public btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutputDXtoCPU()
+	{
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the DX to DX version and subclasses DX to CPU so that it works for that too.
+ */
+class btSoftBodySolverOutputDXtoDX : public btSoftBodySolverOutputDXtoCPU
+{
+protected:
+	struct OutputToVertexArrayCB
+	{
+		int startNode;
+		int numNodes;
+		int positionOffset;
+		int positionStride;
+		
+		int normalOffset;	
+		int normalStride;
+		int padding1;
+		int padding2;
+	};
+	
+	DXFunctions dxFunctions;
+	DXFunctions::KernelDesc outputToVertexArrayWithNormalsKernel;
+	DXFunctions::KernelDesc outputToVertexArrayWithoutNormalsKernel;
+
+	
+	bool m_shadersInitialized;
+
+	bool checkInitialized();
+	bool buildShaders();
+	void releaseKernels();
+
+public:
+	btSoftBodySolverOutputDXtoDX(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory) :
+	  dxFunctions( dx11Device, dx11Context, dx11CompileFromMemory )
+	{
+		m_shadersInitialized = false;
+	}
+
+	~btSoftBodySolverOutputDXtoDX()
+	{
+		releaseKernels();
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+
+
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h
@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "vectormath/vmInclude.h"
+#include "btSoftBodySolver_DX11.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
+#include "btSoftBodySolverVertexData_DX11.h"
+#include "btSoftBodySolverTriangleData_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+
+class btDX11SIMDAwareSoftBodySolver : public btDX11SoftBodySolver
+{
+protected:
+	struct SolvePositionsFromLinksKernelCB
+	{		
+		int startWave;
+		int numWaves;
+		float kst;
+		float ti;
+	};
+
+
+	/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
+	btSoftBodyLinkDataDX11SIMDAware m_linkData;
+		
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	
+	virtual bool buildShaders();
+
+	void updateConstants( float timeStep );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	
+
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+
+
+
+public:
+	btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
+
+	virtual ~btDX11SIMDAwareSoftBodySolver();
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void solveConstraints( float solverdt );
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DX_SIMD_SOLVER;
+	}
+	
+};
+
+#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/premake4.lua
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/premake4.lua
@ -0,0 +1,23 @@
+	
+hasDX11 = findDirectX11()
+	
+if (hasDX11) then
+	
+	project "BulletSoftBodyDX11Solvers"
+		
+  initDirectX11()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../lib"
+	
+	includedirs {
+		".",
+		"../../.."
+	}
+	files {
+		"**.cpp",
+		"**.h"
+	}
+
+end
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt
@ -0,0 +1,62 @@
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${AMD_OPENCL_INCLUDES}
+)
+
+ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
+ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+	../btSoftBodySolverOutputCLtoGL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../btSoftBodySolver_OpenCLSIMDAware.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCLSIMDAware.h
+	../btSoftBodySolverBuffer_OpenCL.h
+	../btSoftBodySolverVertexBuffer_OpenGL.h
+	../btSoftBodySolverOutputCLtoGL.h
+)
+
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_AMD BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/premake4.lua
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/premake4.lua
@ -0,0 +1,27 @@
+	
+hasCL = findOpenCL_AMD()
+	
+if (hasCL) then
+	
+	project "BulletSoftBodySolvers_OpenCL_AMD"
+		
+ 	defines { "USE_AMD_OPENCL","CL_PLATFORM_AMD"}
+
+	initOpenCL_AMD()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../../lib"
+	
+	includedirs {
+		".",
+		"../../../..",
+		"../../../../../Glut"
+	}
+	files {
+		"../btSoftBodySolver_OpenCL.cpp",
+		"../btSoftBodySolver_OpenCLSIMDAware.cpp",
+		"../btSoftBodySolverOutputCLtoGL.cpp"
+	}
+
+end
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt
@ -0,0 +1,77 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverBuffer_OpenCL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	IF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
+		SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES LINK_FLAGS "-framework OpenCL")
+	ENDIF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Apple BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple  DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple  DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt
@ -0,0 +1,17 @@
+	SUBDIRS( MiniCL  )
+
+IF(BUILD_INTEL_OPENCL_DEMOS)
+	SUBDIRS(Intel)
+ENDIF()
+
+IF(BUILD_AMD_OPENCL_DEMOS)
+	SUBDIRS(AMD)
+ENDIF()
+
+IF(BUILD_NVIDIA_OPENCL_DEMOS)
+	SUBDIRS(NVidia)
+ENDIF()
+
+IF(APPLE AND OPENCL_LIBRARY) 
+	SUBDIRS(Apple)
+ENDIF()
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt
@ -0,0 +1,82 @@
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${INTEL_OPENCL_INCLUDES}
+)
+
+ADD_DEFINITIONS(-DUSE_INTEL_OPENCL)
+ADD_DEFINITIONS(-DCL_PLATFORM_INTEL)
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+	../btSoftBodySolverOutputCLtoGL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../btSoftBodySolver_OpenCLSIMDAware.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCLSIMDAware.h
+	../btSoftBodySolverBuffer_OpenCL.h
+	../btSoftBodySolverVertexBuffer_OpenGL.h
+	../btSoftBodySolverOutputCLtoGL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Intel
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Intel BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/premake4.lua
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/premake4.lua
@ -0,0 +1,27 @@
+	
+hasCL = findOpenCL_Intel()
+	
+if (hasCL) then
+	
+	project "BulletSoftBodySolvers_OpenCL_Intel"
+		
+ 	defines { "USE_INTEL_OPENCL","CL_PLATFORM_INTEL"}
+
+	initOpenCL_Intel()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../../lib"
+	
+	includedirs {
+		".",
+		"../../../..",
+		"../../../../../Glut"
+	}
+	files {
+		"../btSoftBodySolver_OpenCL.cpp",
+		"../btSoftBodySolver_OpenCLSIMDAware.cpp",
+		"../btSoftBodySolverOutputCLtoGL.cpp"
+	}
+
+end
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt
@ -0,0 +1,75 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+ADD_DEFINITIONS(-DUSE_MINICL)
+
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverBuffer_OpenCL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Mini MiniCL BulletMultiThreaded BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
@ -0,0 +1,249 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <MiniCL/cl_MiniCL_Defs.h>
+
+#define MSTRINGIFY(A) A
+#include "../OpenCLC10/ApplyForces.cl"
+#include "../OpenCLC10/Integrate.cl"
+#include "../OpenCLC10/PrepareLinks.cl"
+#include "../OpenCLC10/SolvePositions.cl"
+#include "../OpenCLC10/UpdateNodes.cl"
+#include "../OpenCLC10/UpdateNormals.cl"
+#include "../OpenCLC10/UpdatePositions.cl"
+#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
+#include "../OpenCLC10/VSolveLinks.cl"
+#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
+//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
+
+
+MINICL_REGISTER(PrepareLinksKernel)
+MINICL_REGISTER(VSolveLinksKernel)
+MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel)
+MINICL_REGISTER(SolvePositionsFromLinksKernel)
+MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel)
+MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel)
+MINICL_REGISTER(IntegrateKernel)
+MINICL_REGISTER(ApplyForcesKernel)
+MINICL_REGISTER(ResetNormalsAndAreasKernel)
+MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
+MINICL_REGISTER(UpdateSoftBodiesKernel)
+MINICL_REGISTER(UpdateFixedVertexPositions)
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = float4(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = float4(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = float4(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = float4(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+	
+	if( get_global_id(0) < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		
+		// Abort if this is not a valid cloth
+		if( clothIdentifier < 0 )
+			return;
+
+
+		float4 position (g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition (g_vertexPreviousPositions[nodeID].xyz, 1.f);
+			
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		float4 difference = position - previousPosition;
+		float4 velocity = difference*velocityCoefficient*isolverdt;
+		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+		if( numObjects > 0 )
+		{
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					int capsuleupAxis = shapeDescription.upAxis;
+
+					// Four columns of worldTransform matrix
+					float4 worldTransform[4];
+					worldTransform[0] = shapeDescription.shapeTransform[0];
+					worldTransform[1] = shapeDescription.shapeTransform[1];
+					worldTransform[2] = shapeDescription.shapeTransform[2];
+					worldTransform[3] = shapeDescription.shapeTransform[3];
+
+					// Correctly define capsule centerline vector 
+					float4 c1 (0.f, 0.f, 0.f, 1.f); 
+					float4 c2 (0.f, 0.f, 0.f, 1.f);
+					c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+					c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+					c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+					c2.x = -c1.x;
+					c2.y = -c1.y;
+					c2.z = -c1.z;
+
+
+					float4 worldC1 = matrixVectorMul(worldTransform, c1);
+					float4 worldC2 = matrixVectorMul(worldTransform, c2);
+					float4 segment = (worldC2 - worldC1);
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
+
+					float4 closestPoint = (worldC1 + (segment * distanceAlongSegment));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float4 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = float4(normalize(position - worldC1).xyz, 0.f);
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = float4(normalize(position - worldC2).xyz, 0.f);	
+					} else {
+						dist = distanceFromLine;
+						normalVector = float4(normalize(position - closestPoint).xyz, 0.f);
+					}
+						
+					float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+					float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+					float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - float4(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + float4(normalVector*(minDistance - dist)*0.9f);
+						velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+						float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float4 p1 = normalize(cross(normalVector, segment));
+						float4 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0f )
+							forceOnVertex -= frictionVector;
+					}
+				}
+			}
+		}
+
+		g_vertexVelocities[nodeID] = float4(velocity.xyz, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = float4(forceOnVertex.xyz, 0.f);
+
+		g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+	}
+}
+
+
+MINICL_REGISTER(SolveCollisionsAndUpdateVelocitiesKernel);
+
+
+
+
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt
@ -0,0 +1,81 @@
+
+ADD_DEFINITIONS(-DUSE_NVIDIA_OPENCL)
+ADD_DEFINITIONS(-DCL_PLATFORM_NVIDIA)
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${NVIDIA_OPENCL_INCLUDES}
+)
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+	../btSoftBodySolverOutputCLtoGL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCLSIMDAware.h
+	../btSoftBodySolverBuffer_OpenCL.h
+	../btSoftBodySolverVertexBuffer_OpenGL.h
+	../btSoftBodySolverOutputCLtoGL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_NVidia BulletSoftBody BulletDynamics)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/premake4.lua
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/premake4.lua
@ -0,0 +1,27 @@
+	
+hasCL = findOpenCL_NVIDIA()
+	
+if (hasCL) then
+	
+	project "BulletSoftBodySolvers_OpenCL_NVIDIA"
+		
+ 	defines { "USE_NVIDIA_OPENCL","CL_PLATFORM_NVIDIA"}
+
+	initOpenCL_NVIDIA()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../../lib"
+	
+	includedirs {
+		".",
+		"../../../..",
+		"../../../../../Glut"
+	}
+	files {
+		"../btSoftBodySolver_OpenCL.cpp",
+		"../btSoftBodySolver_OpenCLSIMDAware.cpp",
+		"../btSoftBodySolverOutputCLtoGL.cpp"
+	}
+
+end
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl
@ -0,0 +1,102 @@
+MSTRINGIFY(
+
+
+float adot3(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+float alength3(float4 a)
+{
+	a.w = 0;
+	return length(a);
+}
+
+float4 anormalize3(float4 a)
+{
+	a.w = 0;
+	return normalize(a);
+}
+
+float4 projectOnAxis( float4 v, float4 a )
+{
+	return (a*adot3(v, a));
+}
+
+__kernel void 
+ApplyForcesKernel(
+	const uint numNodes,
+	const float solverdt,
+	const float epsilon,
+	__global int * g_vertexClothIdentifier,
+	__global float4 * g_vertexNormal,
+	__global float * g_vertexArea,
+	__global float * g_vertexInverseMass,
+	__global float * g_clothLiftFactor,
+	__global float * g_clothDragFactor,
+	__global float4 * g_clothWindVelocity,
+	__global float4 * g_clothAcceleration,
+	__global float * g_clothMediumDensity,
+	__global float4 * g_vertexForceAccumulator,
+	__global float4 * g_vertexVelocity GUID_ARG)
+{
+	unsigned int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{		
+		int clothId  = g_vertexClothIdentifier[nodeID];
+		float nodeIM = g_vertexInverseMass[nodeID];
+		
+		if( nodeIM > 0.0f )
+		{
+			float4 nodeV  = g_vertexVelocity[nodeID];
+			float4 normal = g_vertexNormal[nodeID];
+			float area    = g_vertexArea[nodeID];
+			float4 nodeF  = g_vertexForceAccumulator[nodeID];
+			
+			// Read per-cloth values
+			float4 clothAcceleration = g_clothAcceleration[clothId];
+			float4 clothWindVelocity = g_clothWindVelocity[clothId];
+			float liftFactor = g_clothLiftFactor[clothId];
+			float dragFactor = g_clothDragFactor[clothId];
+			float mediumDensity = g_clothMediumDensity[clothId];
+		
+			// Apply the acceleration to the cloth rather than do this via a force
+			nodeV += (clothAcceleration*solverdt);
+
+			g_vertexVelocity[nodeID] = nodeV;
+
+			// Aerodynamics
+			float4 rel_v = nodeV - clothWindVelocity;
+			float rel_v_len = alength3(rel_v);
+			float rel_v2 = dot(rel_v, rel_v);
+			
+			if( rel_v2 > epsilon )
+			{
+				float4 rel_v_nrm = anormalize3(rel_v);
+				float4 nrm = normal;
+									
+				nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f);
+
+				float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f);
+				float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f);
+
+				float n_dot_v = dot(nrm, rel_v_nrm);
+
+				// drag force
+				if ( dragFactor > 0.f )
+					fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm;
+
+				// lift force
+				// Check angle of attack
+				// cos(10º) = 0.98480
+				if ( 0 < n_dot_v && n_dot_v < 0.98480f)
+					fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm));
+				
+				nodeF += fDrag + fLift;
+					g_vertexForceAccumulator[nodeID] = nodeF;	
+			}
+		}
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ComputeBounds.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ComputeBounds.cl
@ -0,0 +1,82 @@
+MSTRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
+#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
+
+__kernel void
+ComputeBoundsKernel( 
+	const int numNodes,
+	const int numSoftBodies,
+	__global int * g_vertexClothIdentifier,
+	__global float4 * g_vertexPositions,
+	/* Unfortunately, to get the atomics below to work these arrays cannot be */
+	/* uint4, though that is the layout of the data */
+	/* Therefore this is little-endian-only code */
+	volatile __global uint * g_clothMinBounds,
+	volatile __global uint * g_clothMaxBounds,
+	volatile __local uint * clothMinBounds,
+	volatile __local uint * clothMaxBounds)
+{
+	// Init min and max bounds arrays
+	if( get_local_id(0) < numSoftBodies )
+	{
+		
+		clothMinBounds[get_local_id(0)*4] = UINT_MAX;
+		clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
+		clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
+		clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
+		clothMaxBounds[get_local_id(0)*4] = 0;
+		clothMaxBounds[get_local_id(0)*4+1] = 0;
+		clothMaxBounds[get_local_id(0)*4+2] = 0;
+		clothMaxBounds[get_local_id(0)*4+3] = 0;
+
+	}
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		if( clothIdentifier >= 0 )
+		{
+
+			float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
+
+			/* Reinterpret position as uint */
+			uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
+		
+			/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
+			positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
+			positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);		
+			positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
+		
+			// Min/max with the LDS values
+			atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
+			atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
+			atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
+
+			atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
+			atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
+			atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
+		}
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+
+	/* Use global atomics to update the global versions of the data */
+	if( get_local_id(0) < numSoftBodies )
+	{
+		/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
+		atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
+		atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
+		atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
+
+		atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);		
+		atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
+		atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
+	}
+}
+
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl
@ -0,0 +1,35 @@
+MSTRINGIFY(
+
+// Node indices for each link
+
+
+
+__kernel void
+IntegrateKernel( 
+	const int numNodes,
+	const float solverdt,
+	__global float * g_vertexInverseMasses,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexVelocity,
+	__global float4 * g_vertexPreviousPositions,
+	__global float4 * g_vertexForceAccumulator GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		float4 position   = g_vertexPositions[nodeID];
+		float4 velocity   = g_vertexVelocity[nodeID];
+		float4 force      = g_vertexForceAccumulator[nodeID];
+		float inverseMass = g_vertexInverseMasses[nodeID];
+		
+		g_vertexPreviousPositions[nodeID] = position;
+		velocity += force * inverseMass * solverdt;
+		position += velocity * solverdt;
+		
+		g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
+		g_vertexPositions[nodeID]        = position;
+		g_vertexVelocity[nodeID]         = velocity;	
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/OutputToVertexArray.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/OutputToVertexArray.cl
@ -0,0 +1,46 @@
+MSTRINGIFY(
+
+__kernel void 
+OutputToVertexArrayWithNormalsKernel( 
+	const int startNode, const int numNodes, __global float *g_vertexBuffer,
+	const int positionOffset, const int positionStride, const __global float4* g_vertexPositions, 
+	const int normalOffset, const int normalStride, const __global float4* g_vertexNormals  )
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;
+		
+		int normalDestination = nodeID * normalStride + normalOffset;
+		g_vertexBuffer[normalDestination] = normal.x;
+		g_vertexBuffer[normalDestination+1] = normal.y;
+		g_vertexBuffer[normalDestination+2] = normal.z;		
+	}
+}
+
+__kernel void 
+OutputToVertexArrayWithoutNormalsKernel(
+	const int startNode, const int numNodes, __global float *g_vertexBuffer,
+	const int positionOffset, const int positionStride, const __global float4* g_vertexPositions )
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;		
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl
@ -0,0 +1,38 @@
+MSTRINGIFY(
+
+
+
+__kernel void 
+PrepareLinksKernel( 
+	const int numLinks,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksMassLSC,
+	__global float4 * g_nodesPreviousPosition,
+	__global float * g_linksLengthRatio,
+	__global float4 * g_linksCurrentLength GUID_ARG)
+{
+	int linkID = get_global_id(0);
+	if( linkID < numLinks )
+	{	
+		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
+		float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
+
+		float massLSC = g_linksMassLSC[linkID];
+		
+		float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
+		linkCurrentLength.w = 0.f;
+		
+		float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
+		linkLengthRatio = 1.0f/linkLengthRatio;
+		
+		g_linksCurrentLength[linkID] = linkCurrentLength;
+		g_linksLengthRatio[linkID]   = linkLengthRatio;		
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl
@ -0,0 +1,204 @@
+MSTRINGIFY(
+
+
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+	
+	if( get_global_id(0) < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		
+		// Abort if this is not a valid cloth
+		if( clothIdentifier < 0 )
+			return;
+
+
+		float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+			
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		float4 difference = position - previousPosition;
+		float4 velocity = difference*velocityCoefficient*isolverdt;
+		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+		if( numObjects > 0 )
+		{
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					int capsuleupAxis = shapeDescription.upAxis;
+
+					// Four columns of worldTransform matrix
+					float4 worldTransform[4];
+					worldTransform[0] = shapeDescription.shapeTransform[0];
+					worldTransform[1] = shapeDescription.shapeTransform[1];
+					worldTransform[2] = shapeDescription.shapeTransform[2];
+					worldTransform[3] = shapeDescription.shapeTransform[3];
+
+					// Correctly define capsule centerline vector 
+					float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f); 
+					float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
+					c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+					c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+					c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+					c2.x = -c1.x;
+					c2.y = -c1.y;
+					c2.z = -c1.z;
+
+
+					float4 worldC1 = matrixVectorMul(worldTransform, c1);
+					float4 worldC2 = matrixVectorMul(worldTransform, c2);
+					float4 segment = (worldC2 - worldC1);
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
+
+					float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float4 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);	
+					} else {
+						dist = distanceFromLine;
+						normalVector = (float4)(normalize(position - closestPoint).xyz, 0.f);
+					}
+						
+					float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+					float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+					float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + (float4)((minDistance - dist)*normalVector*0.9f);
+						velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+						float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float4 p1 = normalize(cross(normalVector, segment));
+						float4 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0f )
+							forceOnVertex -= frictionVector;
+					}
+				}
+			}
+		}
+
+		g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
+
+		g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl
@ -0,0 +1,242 @@
+MSTRINGIFY(
+
+//#pragma OPENCL EXTENSION cl_amd_printf:enable\n
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+float mylength3(float4 a)
+{
+	a.w = 0;
+	return length(a);
+}
+
+float4 mynormalize3(float4 a)
+{
+	a.w = 0;
+	return normalize(a);
+}
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions,
+	__local CollisionShapeDescription *localCollisionShapes,
+	__global float * g_vertexInverseMasses)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+
+	int clothIdentifier = g_vertexClothIdentifier[nodeID];
+
+	// Abort if this is not a valid cloth
+	if( clothIdentifier < 0 )
+		return;
+	
+
+	float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
+	float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 0.f);
+			
+	float clothFriction = g_perClothFriction[clothIdentifier];
+	float dampingFactor = g_clothDampingFactor[clothIdentifier];
+	float velocityCoefficient = (1.f - dampingFactor);		
+	float4 difference = position - previousPosition;
+	float4 velocity = difference*velocityCoefficient*isolverdt;			
+	float inverseMass = g_vertexInverseMasses[nodeID];
+		
+	CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+	int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+	if( numObjects > 0 )
+	{
+		// We have some possible collisions to deal with
+		
+		// First load all of the collision objects into LDS
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		if( get_local_id(0) < numObjects )
+		{
+			localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
+		}
+	}
+
+	// Safe as the vertices are padded so that not more than one soft body is in a group
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+	// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
+	if( numObjects > 0 )
+	{
+		
+		
+		// We have some possible collisions to deal with
+		for( int collision = 0; collision < numObjects; ++collision )
+		{
+			CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
+			float colliderFriction = localCollisionShapes[collision].friction;
+		
+			if( localCollisionShapes[collision].collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+			{
+				// Colliding with a capsule
+
+				float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
+				float capsuleRadius = localCollisionShapes[collision].radius;
+				float capsuleMargin = localCollisionShapes[collision].margin;
+				int capsuleupAxis = localCollisionShapes[collision].upAxis;
+
+				if ( capsuleHalfHeight <= 0 )
+						capsuleHalfHeight = 0.0001f;
+				float4 worldTransform[4];
+				worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
+				worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
+				worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
+				worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
+
+				// Correctly define capsule centerline vector 
+				float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f); 
+				float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
+				c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+				c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+				c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+				c2.x = -c1.x;
+				c2.y = -c1.y;
+				c2.z = -c1.z;
+
+				float4 worldC1 = matrixVectorMul(worldTransform, c1);
+				float4 worldC2 = matrixVectorMul(worldTransform, c2);
+				float4 segment = (float4)((worldC2 - worldC1).xyz, 0.f);
+
+				float4 segmentNormalized = mynormalize3(segment);
+				float distanceAlongSegment =mydot3a( (position - worldC1), segmentNormalized );
+
+				float4 closestPointOnSegment = (worldC1 + (float4)(segmentNormalized * distanceAlongSegment));
+				float distanceFromLine = mylength3(position - closestPointOnSegment);
+				float distanceFromC1 = mylength3(worldC1 - position);
+				float distanceFromC2 = mylength3(worldC2 - position);
+	
+				// Final distance from collision, point to push from, direction to push in
+				// for impulse force
+				float dist;
+				float4 normalVector;
+
+				if( distanceAlongSegment < 0 )
+				{
+					dist = distanceFromC1;
+					normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);		
+				} else if( distanceAlongSegment > length(segment) ) {
+					dist = distanceFromC2;
+					normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);	
+				} else {
+					dist = distanceFromLine;
+					normalVector = (float4)(normalize(position - closestPointOnSegment).xyz, 0.f);
+				}
+						
+				float minDistance = capsuleRadius + capsuleMargin;
+				float4 closestPointOnSurface = (float4)((position + (minDistance - dist) * normalVector).xyz, 0.f);
+										
+				float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+				float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+				float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, closestPointOnSurface - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+					
+					
+				// Check for a collision
+				if( dist < minDistance )
+				{
+					// Project back to surface along normal
+					position = closestPointOnSurface;
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+					float4 p1 = mynormalize3(cross(normalVector, segment));
+					float4 p2 = mynormalize3(cross(p1, normalVector));
+					
+					float4 tangentialVel = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+					float frictionCoef = (colliderFriction * clothFriction);
+					if (frictionCoef>1.f)
+						frictionCoef = 1.f;
+						
+					//only apply friction if objects are not moving apart
+					float projVel = mydot3a(relativeVelocity,normalVector);
+					if ( projVel >= -0.001f)
+					{
+						if ( inverseMass > 0 )
+						{
+							//float4 myforceOnVertex = -tangentialVel * frictionCoef *  isolverdt * (1.0f / inverseMass);
+							position += (-tangentialVel * frictionCoef) / (isolverdt);
+						}
+					}						
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+
+				}
+			}
+		}
+	}
+	
+	g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);	
+
+	// Update external force
+	g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
+
+	g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl
@ -0,0 +1,57 @@
+
+
+
+MSTRINGIFY(
+
+
+float mydot3(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+__kernel void 
+SolvePositionsFromLinksKernel( 
+	const int startLink,
+	const int numLinks,
+	const float kst,
+	const float ti,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksMassLSC,
+	__global float * g_linksRestLengthSquared,
+	__global float * g_verticesInverseMass,
+	__global float4 * g_vertexPositions GUID_ARG)
+	
+{
+	int linkID = get_global_id(0) + startLink;
+	if( get_global_id(0) < numLinks )
+	{	
+		float massLSC = g_linksMassLSC[linkID];
+		float restLengthSquared = g_linksRestLengthSquared[linkID];
+		
+		if( massLSC > 0.0f )
+		{		
+			int2 nodeIndices = g_linksVertexIndices[linkID];
+			int node0 = nodeIndices.x;
+			int node1 = nodeIndices.y;
+			
+			float4 position0 = g_vertexPositions[node0];
+			float4 position1 = g_vertexPositions[node1];
+
+			float inverseMass0 = g_verticesInverseMass[node0];
+			float inverseMass1 = g_verticesInverseMass[node1]; 
+
+			float4 del = position1 - position0;
+			float len  = mydot3(del, del);
+			float k    = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+
+			g_vertexPositions[node0] = position0;
+			g_vertexPositions[node1] = position1;
+
+		}
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl
@ -0,0 +1,130 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+MSTRINGIFY(
+
+float mydot3(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+__kernel __attribute__((reqd_work_group_size(WAVEFRONT_BLOCK_MULTIPLIER*WAVEFRONT_SIZE, 1, 1)))
+void 
+SolvePositionsFromLinksKernel( 
+	const int startWaveInBatch,
+	const int numWaves,
+	const float kst,
+	const float ti,
+	__global int2 *g_wavefrontBatchCountsVertexCounts,
+	__global int *g_vertexAddressesPerWavefront,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksMassLSC,
+	__global float * g_linksRestLengthSquared,
+	__global float * g_verticesInverseMass,
+	__global float4 * g_vertexPositions,
+	__local int2 *wavefrontBatchCountsVertexCounts,
+	__local float4 *vertexPositionSharedData,
+	__local float *vertexInverseMassSharedData)
+{
+	const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
+	const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
+	const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
+	const int localWavefront = wavefront - firstWavefrontInBlock;
+
+	// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier	
+	if( wavefront < (startWaveInBatch + numWaves) )
+	{	
+		// Load the batch counts for the wavefronts
+		
+		int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
+		int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
+		int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
+
+		// Load the vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
+			vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
+		}
+		
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		// Loop through the batches performing the solve on each in LDS
+		int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;	
+
+		//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
+		
+		int batch = 0;
+		do
+		{
+			int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
+			int locationOfValue = baseDataLocation + laneInWavefront;
+			
+			
+			// These loads should all be perfectly linear across the WF
+			int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
+			float massLSC = g_linksMassLSC[locationOfValue];
+			float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
+			
+			// LDS vertex addresses based on logical wavefront number in block and loaded index
+			int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
+			int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
+			
+			float4 position0 = vertexPositionSharedData[vertexAddress0];
+			float4 position1 = vertexPositionSharedData[vertexAddress1];
+
+			float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
+			float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; 
+
+			float4 del = position1 - position0;
+			float len = mydot3(del, del);
+			
+			float k = 0;
+			if( massLSC > 0.0f )
+			{		
+				k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			}
+			
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+			
+			// Ensure compiler does not re-order memory operations
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			vertexPositionSharedData[vertexAddress0] = position0;
+			vertexPositionSharedData[vertexAddress1] = position1;
+			
+			// Ensure compiler does not re-order memory operations
+			barrier(CLK_LOCAL_MEM_FENCE);
+				
+			
+			++batch;
+		} while( batch < batchesWithinWavefront );
+
+		// Update the global memory vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
+		}		
+		
+	}
+
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl
@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+/*#define float3 float4
+
+float dot3(float3 a, float3 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}*/
+
+__kernel void 
+UpdateConstantsKernel( 
+	const int numLinks,
+	__global int2 * g_linksVertexIndices,
+	__global float4 * g_vertexPositions,
+	__global float * g_vertexInverseMasses,
+	__global float * g_linksMaterialLSC,
+	__global float * g_linksMassLSC,
+	__global float * g_linksRestLengthSquared,
+	__global float * g_linksRestLengths)
+{
+	int linkID = get_global_id(0);
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
+		
+		float3 position0   = g_vertexPositions[node0].xyz;
+		float3 position1   = g_vertexPositions[node1].xyz;
+		float inverseMass0 = g_vertexInverseMasses[node0];
+		float inverseMass1 = g_vertexInverseMasses[node1];
+
+		float3 difference = position0 - position1;
+		float length2 = dot(difference, difference);
+		float length = sqrt(length2);
+	
+		g_linksRestLengths[linkID] = length;
+		g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
+		g_linksRestLengthSquared[linkID] = length*length;		
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl
@ -0,0 +1,25 @@
+MSTRINGIFY(
+	
+__kernel void 
+UpdateFixedVertexPositions(
+	const uint numNodes,
+	__global int * g_anchorIndex,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_anchorPositions GUID_ARG)
+{
+	unsigned int nodeID = get_global_id(0);
+
+	if( nodeID < numNodes )
+	{		
+		int anchorIndex  = g_anchorIndex[nodeID];
+		float4 position = g_vertexPositions[nodeID];
+
+		if ( anchorIndex >= 0 )
+		{
+			float4 anchorPosition = g_anchorPositions[anchorIndex];
+			g_vertexPositions[nodeID] = anchorPosition;
+		}
+	}		
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl
@ -0,0 +1,39 @@
+MSTRINGIFY(
+
+
+__kernel void 
+updateVelocitiesFromPositionsWithVelocitiesKernel( 
+	int numNodes,
+	float isolverdt,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexPreviousPositions,
+	__global int * g_vertexClothIndices,
+	__global float *g_clothVelocityCorrectionCoefficients,
+	__global float * g_clothDampingFactor,
+	__global float4 * g_vertexVelocities,
+	__global float4 * g_vertexForces GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		float4 position = g_vertexPositions[nodeID];
+		float4 previousPosition = g_vertexPreviousPositions[nodeID];
+		float4 velocity = g_vertexVelocities[nodeID];
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float4 difference = position - previousPosition;
+				
+		velocity += difference*velocityCorrectionCoefficient*isolverdt;
+		
+		// Damp the velocity
+		velocity *= velocityCoefficient;
+		
+		g_vertexVelocities[nodeID] = velocity;
+		g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl
@ -0,0 +1,102 @@
+MSTRINGIFY(
+
+float length3(float4 a)
+{
+	a.w = 0;
+	return length(a);
+}
+
+float4 normalize3(float4 a)
+{
+	a.w = 0;
+	return normalize(a);
+}
+
+__kernel void 
+ResetNormalsAndAreasKernel(
+	const unsigned int numNodes,
+	__global float4 * g_vertexNormals,
+	__global float * g_vertexArea GUID_ARG)
+{
+	if( get_global_id(0) < numNodes )
+	{
+		g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+		g_vertexArea[get_global_id(0)]    = 0.0f;
+	}
+}
+
+
+__kernel void 
+UpdateSoftBodiesKernel(
+	const unsigned int startFace,
+	const unsigned int numFaces,
+	__global int4 * g_triangleVertexIndexSet,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexNormals,
+	__global float * g_vertexArea,
+	__global float4 * g_triangleNormals,
+	__global float * g_triangleArea GUID_ARG)
+{
+	int faceID = get_global_id(0) + startFace;
+	if( get_global_id(0) < numFaces )
+	{		
+		int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
+		int nodeIndex0 = triangleIndexSet.x;
+		int nodeIndex1 = triangleIndexSet.y;
+		int nodeIndex2 = triangleIndexSet.z;
+
+		float4 node0 = g_vertexPositions[nodeIndex0];
+		float4 node1 = g_vertexPositions[nodeIndex1];
+		float4 node2 = g_vertexPositions[nodeIndex2];
+		float4 nodeNormal0 = g_vertexNormals[nodeIndex0];
+		float4 nodeNormal1 = g_vertexNormals[nodeIndex1];
+		float4 nodeNormal2 = g_vertexNormals[nodeIndex2];
+		float vertexArea0 = g_vertexArea[nodeIndex0];
+		float vertexArea1 = g_vertexArea[nodeIndex1];
+		float vertexArea2 = g_vertexArea[nodeIndex2];
+		
+		float4 vector0 = node1 - node0;
+		float4 vector1 = node2 - node0;
+		
+		float4 faceNormal = cross(vector0, vector1);
+		float triangleArea = length(faceNormal);
+
+		nodeNormal0 = nodeNormal0 + faceNormal;
+		nodeNormal1 = nodeNormal1 + faceNormal;
+		nodeNormal2 = nodeNormal2 + faceNormal;
+		vertexArea0 = vertexArea0 + triangleArea;
+		vertexArea1 = vertexArea1 + triangleArea;
+		vertexArea2 = vertexArea2 + triangleArea;
+		
+		g_triangleNormals[faceID] = normalize3(faceNormal);
+		g_vertexNormals[nodeIndex0] = nodeNormal0;
+		g_vertexNormals[nodeIndex1] = nodeNormal1;
+		g_vertexNormals[nodeIndex2] = nodeNormal2;
+		g_triangleArea[faceID] = triangleArea;
+		g_vertexArea[nodeIndex0] = vertexArea0;
+		g_vertexArea[nodeIndex1] = vertexArea1;
+		g_vertexArea[nodeIndex2] = vertexArea2;
+	}
+}
+
+__kernel void 
+NormalizeNormalsAndAreasKernel( 
+	const unsigned int numNodes,
+	__global int * g_vertexTriangleCount,
+	__global float4 * g_vertexNormals,
+	__global float * g_vertexArea GUID_ARG)
+{
+	if( get_global_id(0) < numNodes )
+	{
+		float4 normal = g_vertexNormals[get_global_id(0)];
+		float area = g_vertexArea[get_global_id(0)];
+		int numTriangles = g_vertexTriangleCount[get_global_id(0)];
+		
+		float vectorLength = length3(normal);
+		
+		g_vertexNormals[get_global_id(0)] = normalize3(normal);
+		g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl
@ -0,0 +1,34 @@
+MSTRINGIFY(
+
+__kernel void 
+updateVelocitiesFromPositionsWithoutVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexPreviousPositions,
+	__global int * g_vertexClothIndices,
+	__global float * g_clothDampingFactor,
+	__global float4 * g_vertexVelocities,
+	__global float4 * g_vertexForces GUID_ARG)
+
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		float4 position = g_vertexPositions[nodeID];
+		float4 previousPosition = g_vertexPreviousPositions[nodeID];
+		float4 velocity = g_vertexVelocities[nodeID];
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float4 difference = position - previousPosition;
+				
+		velocity = difference*velocityCoefficient*isolverdt;		
+		
+		g_vertexVelocities[nodeID] = velocity;
+		g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl
@ -0,0 +1,28 @@
+
+MSTRINGIFY(
+
+
+
+
+__kernel void 
+UpdatePositionsFromVelocitiesKernel( 
+	const int numNodes,
+	const float solverSDT,
+	__global float4 * g_vertexVelocities,
+	__global float4 * g_vertexPreviousPositions,
+	__global float4 * g_vertexCurrentPosition GUID_ARG)
+{
+	int vertexID = get_global_id(0);
+	if( vertexID < numNodes )
+	{	
+		float4 previousPosition = g_vertexPreviousPositions[vertexID];
+		float4 velocity         = g_vertexVelocities[vertexID];
+		
+		float4 newPosition      = previousPosition + velocity*solverSDT;
+		
+		g_vertexCurrentPosition[vertexID]   = newPosition;
+		g_vertexPreviousPositions[vertexID] = newPosition;
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl
@ -0,0 +1,45 @@
+MSTRINGIFY(
+
+__kernel void 
+VSolveLinksKernel( 
+	int startLink,
+	int numLinks,
+	float kst,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksLengthRatio,
+	__global float4 * g_linksCurrentLength,
+	__global float * g_vertexInverseMass,
+	__global float4 * g_vertexVelocity GUID_ARG)
+{
+	int linkID = get_global_id(0) + startLink;
+	if( get_global_id(0) < numLinks )
+	{		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float linkLengthRatio = g_linksLengthRatio[linkID];
+		float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
+		
+		float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
+		float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
+
+		float vertexInverseMass0 = g_vertexInverseMass[node0];
+		float vertexInverseMass1 = g_vertexInverseMass[node1]; 
+
+		float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
+		float dotResult = dot(linkCurrentLength, nodeDifference);
+		float j = -dotResult*linkLengthRatio*kst;
+		
+		float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
+		float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
+		
+		vertexVelocity0 += velocityChange0;
+		vertexVelocity1 -= velocityChange1;
+
+		g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
+		g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
+	}
+}
+
+);
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h
@ -0,0 +1,209 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
+
+// OpenCL support
+
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+#ifndef SAFE_RELEASE
+#define SAFE_RELEASE(p)      { if(p) { (p)->Release(); (p)=NULL; } }
+#endif
+
+template <typename ElementType> class btOpenCLBuffer
+{
+public:
+
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_clContext;
+	cl_mem				m_buffer;
+
+
+
+	btAlignedObjectArray< ElementType > * m_CPUBuffer;
+	
+	int  m_gpuSize;
+	bool m_onGPU;
+	bool m_readOnlyOnGPU;
+	bool m_allocated;
+
+
+	bool createBuffer( cl_mem* preexistingBuffer = 0)
+	{
+
+		cl_int err;
+		 
+
+		if( preexistingBuffer )
+		{
+			m_buffer = *preexistingBuffer;
+		} 
+		else {
+
+			cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
+
+			size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+			// At a minimum the buffer must exist
+			if( size == 0 )
+				size = sizeof(ElementType);
+			m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
+			if( err != CL_SUCCESS )
+			{
+				btAssert( "Buffer::Buffer(m_buffer)");
+			}
+		}
+
+		m_gpuSize = m_CPUBuffer->size();
+
+		return true;
+	}
+
+public:
+	btOpenCLBuffer( cl_command_queue	commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
+		:m_cqCommandQue(commandQue),
+		m_clContext(ctx),
+		m_buffer(0),
+		m_CPUBuffer(CPUBuffer),
+		m_gpuSize(0),
+		m_onGPU(false),
+		m_readOnlyOnGPU(readOnly),
+		m_allocated(false)
+	{
+	}
+
+	~btOpenCLBuffer()
+	{
+		clReleaseMemObject(m_buffer);
+	}
+
+
+	bool moveToGPU()
+	{
+
+
+		cl_int err;
+
+		if( (m_CPUBuffer->size() != m_gpuSize) )
+		{
+			m_onGPU = false;
+		}
+
+		if( !m_allocated && m_CPUBuffer->size() == 0  )
+		{
+			// If it isn't on the GPU and yet there is no data on the CPU side this may cause a problem with some kernels.
+			// We should create *something* on the device side
+			if (!createBuffer()) {
+				return false;
+			}
+			m_allocated = true;
+		}
+
+		if( !m_onGPU && m_CPUBuffer->size() > 0 )
+		{
+			if (!m_allocated || (m_CPUBuffer->size() != m_gpuSize)) {
+				if (!createBuffer()) {
+					return false;
+				}
+				m_allocated = true;
+			}
+			
+			size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+			err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer,
+				CL_FALSE,
+				0,
+				size, 
+				&((*m_CPUBuffer)[0]),0,0,0);
+			if( err != CL_SUCCESS )
+			{
+				btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" );
+			}
+
+			m_onGPU = true;
+		}
+
+		return true;
+
+	}
+
+	bool moveFromGPU()
+	{
+
+		cl_int err;
+
+		if (m_CPUBuffer->size() > 0) {
+			if (m_onGPU && !m_readOnlyOnGPU) {
+				size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+				err = clEnqueueReadBuffer(m_cqCommandQue,
+					m_buffer,
+					CL_TRUE,
+					0,
+					size,
+					&((*m_CPUBuffer)[0]),0,0,0);
+
+				if( err != CL_SUCCESS )
+				{
+					btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)" );
+				}
+
+				m_onGPU = false;
+			}
+		}
+
+		return true;
+	}
+
+	bool copyFromGPU()
+	{
+
+		cl_int err;
+		size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+
+		if (m_CPUBuffer->size() > 0) {
+			if (m_onGPU && !m_readOnlyOnGPU) {
+				err = clEnqueueReadBuffer(m_cqCommandQue,
+					m_buffer,
+					CL_TRUE,
+					0,size, 
+					&((*m_CPUBuffer)[0]),0,0,0);
+
+				if( err != CL_SUCCESS )
+				{
+					btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)");
+				}
+
+			}
+		}
+
+		return true;
+	}
+
+	virtual void changedOnCPU()
+	{
+		m_onGPU = false;
+	}
+}; // class btOpenCLBuffer
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h
@ -0,0 +1,99 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
+
+
+class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+
+	cl_command_queue	m_cqCommandQue;
+
+
+	btOpenCLBuffer<LinkNodePair> m_clLinks;
+	btOpenCLBuffer<float>							      m_clLinkStrength;
+	btOpenCLBuffer<float>								  m_clLinksMassLSC;
+	btOpenCLBuffer<float>								  m_clLinksRestLengthSquared;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			  m_clLinksCLength;
+	btOpenCLBuffer<float>								  m_clLinksLengthRatio;
+	btOpenCLBuffer<float>								  m_clLinksRestLength;
+	btOpenCLBuffer<float>								  m_clLinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+	btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx);
+
+	virtual ~btSoftBodyLinkDataOpenCL();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( 
+		const LinkDescription &link, 
+		int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h
@ -0,0 +1,169 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
+
+
+class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+
+	cl_command_queue	m_cqCommandQue;
+
+	const int m_wavefrontSize;
+	const int m_linksPerWorkItem;
+	const int m_maxLinksPerWavefront;
+	int m_maxBatchesWithinWave;
+	int m_maxVerticesWithinWave;
+	int m_numWavefronts;
+
+	int m_maxVertex;
+
+	struct NumBatchesVerticesPair
+	{
+		int numBatches;
+		int numVertices;
+	};
+
+	btAlignedObjectArray<int>							  m_linksPerWavefront;
+	btAlignedObjectArray<NumBatchesVerticesPair>		  m_numBatchesAndVerticesWithinWaves;
+	btOpenCLBuffer< NumBatchesVerticesPair >			  m_clNumBatchesAndVerticesWithinWaves;
+
+	// All arrays here will contain batches of m_maxLinksPerWavefront links
+	// ordered by wavefront.
+	// with either global vertex pairs or local vertex pairs
+	btAlignedObjectArray< int >							  m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
+	btOpenCLBuffer<int>									  m_clWavefrontVerticesGlobalAddresses;
+	btAlignedObjectArray< LinkNodePair >				  m_linkVerticesLocalAddresses; // Vertex pair for the link
+	btOpenCLBuffer<LinkNodePair>						  m_clLinkVerticesLocalAddresses;
+	btOpenCLBuffer<float>							      m_clLinkStrength;
+	btOpenCLBuffer<float>								  m_clLinksMassLSC;
+	btOpenCLBuffer<float>								  m_clLinksRestLengthSquared;
+	btOpenCLBuffer<float>								  m_clLinksRestLength;
+	btOpenCLBuffer<float>								  m_clLinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+	
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_wavefrontBatchStartLengths;
+
+	btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
+
+	virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( 
+		const LinkDescription &link, 
+		int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+
+	int getMaxVerticesPerWavefront()
+	{
+		return m_maxVerticesWithinWave;
+	}
+
+	int getWavefrontSize()
+	{
+		return m_wavefrontSize;
+	}
+
+	int getLinksPerWorkItem()
+	{
+		return m_linksPerWorkItem;
+	}
+
+	int getMaxLinksPerWavefront()
+	{
+		return m_maxLinksPerWavefront;
+	}
+
+	int getMaxBatchesPerWavefront()
+	{
+		return m_maxBatchesWithinWave;
+	}
+
+	int getNumWavefronts()
+	{
+		return m_numWavefronts;
+	}
+
+	NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
+	{
+		return m_numBatchesAndVerticesWithinWaves[wavefront];
+	}
+
+	int getVertexGlobalAddresses( int vertexIndex )
+	{
+		return m_wavefrontVerticesGlobalAddresses[vertexIndex];
+	}
+
+	/**
+	 * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
+	 */
+	LinkNodePair getVertexPairLocalAddresses( int linkIndex )
+	{
+		return m_linkVerticesLocalAddresses[linkIndex];
+	}
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp
@ -0,0 +1,126 @@
+#include "btSoftBodySolverOutputCLtoGL.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCL.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "btSoftBodySolverVertexBuffer_OpenGL.h"
+#include "BulletSoftBody/btSoftBody.h"
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static char* OutputToVertexArrayCLString =
+#include "OpenCLC10/OutputToVertexArray.cl"
+
+	
+#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
+
+static const size_t workGroupSize = 128;
+
+void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
+	btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
+	checkInitialized();
+	btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {		
+
+		const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);						
+		cl_int ciErrNum = CL_SUCCESS;    
+
+		cl_mem clBuffer = openGLVertexBuffer->getBuffer();		
+		cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
+		if( !vertexBuffer->hasNormals() )
+			outputKernel = outputToVertexArrayWithoutNormalsKernel;
+
+		ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		int numVertices = currentCloth->getNumVertices();
+
+		ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
+		ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
+		ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			int vertexOffset = vertexBuffer->getVertexOffset();
+			int vertexStride = vertexBuffer->getVertexStride();
+			ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
+			ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
+			ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
+
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			int normalOffset = vertexBuffer->getNormalOffset();
+			int normalStride = vertexBuffer->getNormalStride();
+			ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
+			ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
+			ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
+
+		}
+		size_t	numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
+		}
+
+		ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
+		}
+	} else {
+		btAssert( "Undefined output for this solver output" == false );
+	}
+
+	// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
+	clFinish(m_cqCommandQue);
+
+} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
+
+bool btSoftBodySolverOutputCLtoGL::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+	
+	outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
+	outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+} // btSoftBodySolverOutputCLtoGL::buildShaders
+
+void btSoftBodySolverOutputCLtoGL::releaseKernels()
+{
+	RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
+	RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
+
+	m_shadersInitialized = false;
+} // btSoftBodySolverOutputCLtoGL::releaseKernels
+
+bool btSoftBodySolverOutputCLtoGL::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h
@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
+#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
+
+#include "btSoftBodySolver_OpenCL.h"
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the CL to GL interop version.
+ */
+class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
+{
+protected:
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+	CLFunctions			clFunctions;
+	
+	cl_kernel		outputToVertexArrayWithNormalsKernel;
+	cl_kernel		outputToVertexArrayWithoutNormalsKernel;
+
+	bool m_shadersInitialized;
+	
+	virtual bool checkInitialized();	
+	virtual bool buildShaders();
+	void releaseKernels();
+public:
+	btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
+		m_cqCommandQue( cqCommandQue ),
+		m_cxMainContext( cxMainContext ),
+		clFunctions(cqCommandQue, cxMainContext),
+		outputToVertexArrayWithNormalsKernel( 0 ),
+		outputToVertexArrayWithoutNormalsKernel( 0 ),
+		m_shadersInitialized( false )
+	{
+	}
+
+	virtual ~btSoftBodySolverOutputCLtoGL()
+	{
+		releaseKernels();
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h
@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+
+
+class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData
+{
+public:
+	bool				m_onGPU;
+	cl_command_queue    m_queue;
+
+	btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet>					m_clVertexIndices;
+	btOpenCLBuffer<float>								m_clArea;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clNormal;
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_triangleAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	struct btSomePair
+	{
+		btSomePair() {}
+		btSomePair(int f,int s)
+			:first(f),second(s)
+		{
+		}
+		int first;
+		int second;
+	};
+	btAlignedObjectArray< btSomePair >		m_batchStartLengths;
+
+public:
+	btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx );
+
+	virtual ~btSoftBodyTriangleDataOpenCL();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createTriangles( int numTriangles );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire triangle set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+}; // class btSoftBodyTriangleDataOpenCL
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h
@ -0,0 +1,166 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H 
+
+
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+		#include <CL/cl_gl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+
+#ifdef _WIN32//for glut.h
+#include <windows.h>
+#endif
+
+//think different
+#if defined(__APPLE__) && !defined (VMDMESA)
+#include <OpenGL/OpenGL.h>
+#include <OpenGL/gl.h>
+#include <OpenGL/glu.h>
+#include <GLUT/glut.h>
+#else
+
+
+#ifdef _WINDOWS
+#include <windows.h>
+#include <GL/gl.h>
+#include <GL/glu.h>
+#else
+#include <GL/glut.h>
+#endif //_WINDOWS
+#endif //APPLE
+
+
+
+class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	/** OpenCL context */
+	cl_context			m_context;
+
+	/** OpenCL command queue */
+	cl_command_queue	m_commandQueue;
+	
+	/** OpenCL interop buffer */
+	cl_mem m_buffer;
+
+	/** VBO in GL that is the basis of the interop buffer */
+	GLuint m_openGLVBO;
+
+
+public:
+	/**
+	 * context is the OpenCL context this interop buffer will work in.
+	 * queue is the command queue that kernels and data movement will be enqueued into.
+	 * openGLVBO is the OpenGL vertex buffer data will be copied into.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
+	{
+#ifndef USE_MINICL
+		cl_int ciErrNum = CL_SUCCESS;
+		m_context = context;
+		m_commandQueue = cqCommandQue;
+		
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+
+		m_openGLVBO = openGLVBO;
+		
+		m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		m_hasVertexPositions = true;
+#else
+		btAssert(0);//MiniCL shouldn't get here
+#endif
+	}
+
+	/**
+	 * context is the OpenCL context this interop buffer will work in.
+	 * queue is the command queue that kernels and data movement will be enqueued into.
+	 * openGLVBO is the OpenGL vertex buffer data will be copied into.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 * normalOffset is the offset in floats to the first normal.
+	 * normalStride is the stride in floats between normals.
+	 */
+	btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+#ifndef USE_MINICL
+		cl_int ciErrNum = CL_SUCCESS;
+		m_context = context;
+		m_commandQueue = cqCommandQue;
+		
+		m_openGLVBO = openGLVBO;
+		
+		m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+#else
+		btAssert(0);
+#endif //USE_MINICL
+		
+	}
+
+	virtual ~btOpenGLInteropVertexBufferDescriptor()
+	{
+		clReleaseMemObject( m_buffer );
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return OPENGL_BUFFER;
+	}
+
+	virtual cl_context getContext() const
+	{
+		return m_context;
+	}
+
+	virtual cl_mem getBuffer() const
+	{
+		return m_buffer;
+	}	
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h
@ -0,0 +1,52 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
+
+
+class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData
+{
+protected:
+	bool		m_onGPU;
+	cl_command_queue	m_queue;
+
+public:
+	btOpenCLBuffer<int>									m_clClothIdentifier;
+	btOpenCLBuffer<Vectormath::Aos::Point3>				m_clVertexPosition;
+	btOpenCLBuffer<Vectormath::Aos::Point3>				m_clVertexPreviousPosition;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexVelocity;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexForceAccumulator;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexNormal;
+	btOpenCLBuffer<float>									m_clVertexInverseMass;
+	btOpenCLBuffer<float>									m_clVertexArea;
+	btOpenCLBuffer<int>									m_clVertexTriangleCount;
+public:
+	btSoftBodyVertexDataOpenCL( cl_command_queue queue,  cl_context ctx);
+
+	virtual ~btSoftBodyVertexDataOpenCL();
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h
@ -0,0 +1,527 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_OPENCL_H
+
+#include "stddef.h" //for size_t
+#include "vectormath/vmInclude.h"
+
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+#include "btSoftBodySolverLinkData_OpenCL.h"
+#include "btSoftBodySolverVertexData_OpenCL.h"
+#include "btSoftBodySolverTriangleData_OpenCL.h"
+
+class CLFunctions
+{
+protected:
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+
+	int	m_kernelCompilationFailures;
+
+
+public:
+	CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
+		m_cqCommandQue( cqCommandQue ),
+		m_cxMainContext( cxMainContext ),
+		m_kernelCompilationFailures(0)
+	{
+	}
+
+	int getKernelCompilationFailures() const
+	{
+		return m_kernelCompilationFailures;
+	}
+
+	/**
+	 * Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
+	 */	
+	virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
+
+	void	clearKernelCompilationFailures()
+	{
+		m_kernelCompilationFailures=0;
+	}
+};
+
+/**
+ * Entry in the collision shape array.
+ * Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
+ */
+struct CollisionShapeDescription
+{
+	Vectormath::Aos::Transform3 shapeTransform;
+	Vectormath::Aos::Vector3 linearVelocity;
+	Vectormath::Aos::Vector3 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+
+	// Both needed for capsule
+	float radius;
+	float halfHeight;
+	int upAxis;
+	
+	float margin;
+	float friction;
+
+	CollisionShapeDescription()
+	{
+		collisionShapeType = 0;
+		margin = 0;
+		friction = 0;
+	}
+};
+
+/**
+	 * SoftBody class to maintain information about a soft body instance
+	 * within a solver.
+	 * This data addresses the main solver arrays.
+	 */
+class btOpenCLAcceleratedSoftBodyInterface
+{
+protected:
+	/** Current number of vertices that are part of this cloth */
+	int m_numVertices;
+	/** Maximum number of vertices allocated to be part of this cloth */
+	int m_maxVertices;
+	/** Current number of triangles that are part of this cloth */
+	int m_numTriangles;
+	/** Maximum number of triangles allocated to be part of this cloth */
+	int m_maxTriangles;
+	/** Index of first vertex in the world allocated to this cloth */
+	int m_firstVertex;
+	/** Index of first triangle in the world allocated to this cloth */
+	int m_firstTriangle;
+	/** Index of first link in the world allocated to this cloth */
+	int m_firstLink;
+	/** Maximum number of links allocated to this cloth */
+	int m_maxLinks;
+	/** Current number of links allocated to this cloth */
+	int m_numLinks;
+
+	/** The actual soft body this data represents */
+	btSoftBody *m_softBody;
+
+
+public:
+	btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
+	  m_softBody( softBody )
+	{
+		m_numVertices = 0;
+		m_maxVertices = 0;
+		m_numTriangles = 0;
+		m_maxTriangles = 0;
+		m_firstVertex = 0;
+		m_firstTriangle = 0;
+		m_firstLink = 0;
+		m_maxLinks = 0;
+		m_numLinks = 0;
+	}
+	int getNumVertices()
+	{
+		return m_numVertices;
+	}
+
+	int getNumTriangles()
+	{
+		return m_numTriangles;
+	}
+
+	int getMaxVertices()
+	{
+		return m_maxVertices;
+	}
+
+	int getMaxTriangles()
+	{
+		return m_maxTriangles;
+	}
+
+	int getFirstVertex()
+	{
+		return m_firstVertex;
+	}
+
+	int getFirstTriangle()
+	{
+		return m_firstTriangle;
+	}
+	
+	/**
+	 * Update the bounds in the btSoftBody object
+	 */
+	void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
+
+	// TODO: All of these set functions will have to do checks and
+	// update the world because restructuring of the arrays will be necessary
+	// Reasonable use of "friend"?
+	void setNumVertices( int numVertices )
+	{
+		m_numVertices = numVertices;
+	}	
+
+	void setNumTriangles( int numTriangles )
+	{
+		m_numTriangles = numTriangles;
+	}
+
+	void setMaxVertices( int maxVertices )
+	{
+		m_maxVertices = maxVertices;
+	}
+
+	void setMaxTriangles( int maxTriangles )
+	{
+		m_maxTriangles = maxTriangles;
+	}
+
+	void setFirstVertex( int firstVertex )
+	{
+		m_firstVertex = firstVertex;
+	}
+
+	void setFirstTriangle( int firstTriangle )
+	{
+		m_firstTriangle = firstTriangle;
+	}
+
+	void setMaxLinks( int maxLinks )
+	{
+		m_maxLinks = maxLinks;
+	}
+
+	void setNumLinks( int numLinks )
+	{
+		m_numLinks = numLinks;
+	}
+
+	void setFirstLink( int firstLink )
+	{
+		m_firstLink = firstLink;
+	}
+
+	int getMaxLinks()
+	{
+		return m_maxLinks;
+	}
+
+	int getNumLinks()
+	{
+		return m_numLinks;
+	}
+
+	int getFirstLink()
+	{
+		return m_firstLink;
+	}
+
+	btSoftBody* getSoftBody()
+	{
+		return m_softBody;
+	}
+
+};
+
+
+
+class btOpenCLSoftBodySolver : public btSoftBodySolver
+{
+public:
+	
+
+	struct UIntVector3
+	{
+		UIntVector3()
+		{
+			x = 0;
+			y = 0;
+			z = 0;
+			_padding = 0;
+		}
+		
+		UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
+		{
+			x = x_;
+			y = y_;
+			z = z_;
+			_padding = 0;
+		}
+			
+		unsigned int x;
+		unsigned int y;
+		unsigned int z;
+		unsigned int _padding;
+	};
+
+	struct CollisionObjectIndices
+	{
+		CollisionObjectIndices( int f, int e )
+		{
+			firstObject = f;
+			endObject = e;
+		}
+
+		int firstObject;
+		int endObject;
+	};
+
+	btSoftBodyLinkDataOpenCL m_linkData;
+	btSoftBodyVertexDataOpenCL m_vertexData;
+	btSoftBodyTriangleDataOpenCL m_triangleData;
+
+protected:
+
+	CLFunctions m_defaultCLFunctions;
+	CLFunctions* m_currentCLFunctions;
+
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	bool m_shadersInitialized;
+
+	/** 
+	 * Cloths owned by this solver.
+	 * Only our cloths are in this array.
+	 */
+	btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet;
+
+	/** Acceleration value to be applied to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothAcceleration;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clPerClothAcceleration;
+
+	/** Wind velocity to be applied normal to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothWindVelocity;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clPerClothWindVelocity;
+
+	/** Velocity damping factor */
+	btAlignedObjectArray< float >						m_perClothDampingFactor;
+	btOpenCLBuffer<float>								m_clPerClothDampingFactor;
+
+	/** Velocity correction coefficient */
+	btAlignedObjectArray< float >						m_perClothVelocityCorrectionCoefficient;
+	btOpenCLBuffer<float>								m_clPerClothVelocityCorrectionCoefficient;
+
+	/** Lift parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothLiftFactor;
+	btOpenCLBuffer<float>								m_clPerClothLiftFactor;
+	
+	/** Drag parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothDragFactor;
+	btOpenCLBuffer<float>								m_clPerClothDragFactor;
+
+	/** Density of the medium in which each cloth sits */
+	btAlignedObjectArray< float >						m_perClothMediumDensity;
+	btOpenCLBuffer<float>								m_clPerClothMediumDensity;
+
+	/** 
+	 * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
+	 */
+	btAlignedObjectArray< CollisionObjectIndices >		m_perClothCollisionObjects;
+	btOpenCLBuffer<CollisionObjectIndices>				m_clPerClothCollisionObjects;
+
+	/** 
+	 * Collision shapes being passed across to the cloths in this solver.
+	 */
+	btAlignedObjectArray< CollisionShapeDescription >	m_collisionObjectDetails;
+	btOpenCLBuffer< CollisionShapeDescription >			m_clCollisionObjectDetails;
+
+
+	
+	/** 
+	 * Friction coefficient for each cloth
+	 */
+	btAlignedObjectArray< float >	m_perClothFriction;
+	btOpenCLBuffer< float >			m_clPerClothFriction;
+
+	// anchor node info
+	struct AnchorNodeInfoCL
+	{
+		int clVertexIndex;
+		btSoftBody::Node* pNode;
+	};
+
+	btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
+	btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
+	btOpenCLBuffer<Vectormath::Aos::Point3>		  m_clAnchorPosition;
+	btAlignedObjectArray<int> m_anchorIndex;
+	btOpenCLBuffer<int>		  m_clAnchorIndex;
+
+	bool m_bUpdateAnchoredNodePos;
+
+	cl_kernel		m_prepareLinksKernel;
+	cl_kernel		m_solvePositionsFromLinksKernel;
+	cl_kernel		m_updateConstantsKernel;
+	cl_kernel		m_integrateKernel;
+	cl_kernel		m_addVelocityKernel;
+	cl_kernel		m_updatePositionsFromVelocitiesKernel;
+	cl_kernel		m_updateVelocitiesFromPositionsWithoutVelocitiesKernel;
+	cl_kernel		m_updateVelocitiesFromPositionsWithVelocitiesKernel;
+	cl_kernel		m_vSolveLinksKernel;
+	cl_kernel		m_solveCollisionsAndUpdateVelocitiesKernel;
+	cl_kernel		m_resetNormalsAndAreasKernel;
+	cl_kernel		m_normalizeNormalsAndAreasKernel;
+	cl_kernel		m_updateSoftBodiesKernel;
+
+	cl_kernel		m_outputToVertexArrayKernel;
+	cl_kernel		m_applyForcesKernel;
+	cl_kernel       m_updateFixedVertexPositionsKernel;	
+
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+	
+	size_t				m_defaultWorkGroupSize;
+
+
+	virtual bool buildShaders();
+
+	void resetNormalsAndAreas( int numVertices );
+
+	void normalizeNormalsAndAreas( int numVertices );
+
+	void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
+
+	void prepareCollisionConstraints();
+	
+	Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
+
+	void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
+	
+
+	int findSoftBodyIndex( const btSoftBody* const softBody );
+
+	virtual void applyForces( float solverdt );
+
+	void updateFixedVertexPositions();
+
+	/**
+	 * Integrate motion on the solver.
+	 */
+	virtual void integrate( float solverdt );
+
+	virtual void updateConstants( float timeStep );
+
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	void prepareLinks();
+
+	void solveLinksForVelocity( int startLink, int numLinks, float kst );
+
+	void updatePositionsFromVelocities( float solverdt );
+
+	virtual void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	
+	void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
+
+	void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
+	virtual void solveCollisionsAndUpdateVelocities( float isolverdt );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+	
+	void updateBounds();
+
+	void releaseKernels();
+
+public:
+	btOpenCLSoftBodySolver(cl_command_queue queue,cl_context	ctx, bool bUpdateAchchoredNodePos = false);
+
+	virtual ~btOpenCLSoftBodySolver();
+
+
+	
+	btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual btSoftBodyVertexData &getVertexData();
+
+	virtual btSoftBodyTriangleData &getTriangleData();
+
+	virtual SolverTypes getSolverType() const
+	{
+		return CL_SOLVER;
+	}
+
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+	virtual void	setDefaultWorkgroupSize(size_t workGroupSize)
+	{
+		m_defaultWorkGroupSize = workGroupSize;
+	}
+	virtual size_t	getDefaultWorkGroupSize() const
+	{
+		return m_defaultWorkGroupSize;
+	}
+
+	void	setCLFunctions(CLFunctions* funcs)
+	{
+		if (funcs)
+			m_currentCLFunctions = funcs;
+		else
+			m_currentCLFunctions  = &m_defaultCLFunctions;
+	}
+
+}; // btOpenCLSoftBodySolver
+
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the CL to CPU version.
+ */
+class btSoftBodySolverOutputCLtoCPU : public btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutputCLtoCPU()
+	{
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h
@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
+#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
+
+#include "stddef.h" //for size_t
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolver_OpenCL.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
+#include "btSoftBodySolverVertexData_OpenCL.h"
+#include "btSoftBodySolverTriangleData_OpenCL.h"
+
+
+
+
+
+class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
+{
+protected:
+	
+
+	btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
+
+
+
+
+	virtual bool buildShaders();
+
+
+	void updateConstants( float timeStep );
+
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	
+	void solveCollisionsAndUpdateVelocities( float isolverdt );
+	// End kernel dispatches
+	/////////////////////////////////////
+
+public:
+	btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context	ctx, bool bUpdateAchchoredNodePos = false);
+
+	virtual ~btOpenCLSoftBodySolverSIMDAware();
+
+	virtual SolverTypes getSolverType() const
+	{
+		return CL_SIMD_SOLVER;
+	}
+
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void solveConstraints( float solverdt );
+
+}; // btOpenCLSoftBodySolverSIMDAware
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
--- a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h
@ -0,0 +1,748 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_DATA_H
+#define BT_SOFT_BODY_SOLVER_DATA_H
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+
+class btSoftBodyLinkData
+{
+public:
+	/**
+	 * Class representing a link as a set of three indices into the vertex array.
+	 */
+	class LinkNodePair
+	{
+	public:
+		int vertex0;
+		int vertex1;
+
+		LinkNodePair()
+		{
+			vertex0 = 0;
+			vertex1 = 0;
+		}
+
+		LinkNodePair( int v0, int v1 )
+		{
+			vertex0 = v0;
+			vertex1 = v1;
+		}
+	};
+
+	/**
+	 * Class describing a link for input into the system.
+	 */
+	class LinkDescription
+	{
+	protected:
+		int m_vertex0;
+		int m_vertex1;
+		float m_linkLinearStiffness;
+		float m_linkStrength;
+
+	public:
+
+		LinkDescription()
+		{
+			m_vertex0 = 0;
+			m_vertex1 = 0;
+			m_linkLinearStiffness = 1.0;
+			m_linkStrength = 1.0;
+		}
+
+		LinkDescription( int newVertex0, int newVertex1, float linkLinearStiffness )
+		{
+			m_vertex0 = newVertex0;
+			m_vertex1 = newVertex1;
+			m_linkLinearStiffness = linkLinearStiffness;
+			m_linkStrength = 1.0;
+		}
+
+		LinkNodePair getVertexPair() const
+		{
+			LinkNodePair nodes;
+			nodes.vertex0 = m_vertex0;
+			nodes.vertex1 = m_vertex1;
+			return nodes;
+		}
+
+		void setVertex0( int vertex )
+		{
+			m_vertex0 = vertex;
+		}
+
+		void setVertex1( int vertex )
+		{
+			m_vertex1 = vertex;
+		}
+
+		void setLinkLinearStiffness( float linearStiffness )
+		{
+			m_linkLinearStiffness = linearStiffness;
+		}
+
+		void setLinkStrength( float strength )
+		{
+			m_linkStrength = strength;
+		}
+
+		int getVertex0() const
+		{
+			return m_vertex0;
+		}
+
+		int getVertex1() const
+		{
+			return m_vertex1;
+		}
+
+		float getLinkStrength() const
+		{
+			return m_linkStrength;
+		}
+
+		float getLinkLinearStiffness() const
+		{
+			return m_linkLinearStiffness;
+		}
+	};
+
+
+protected:
+	// NOTE:
+	// Vertex reference data is stored relative to global array, not relative to individual cloth.
+	// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
+	// to another.
+
+	btAlignedObjectArray< LinkNodePair > m_links; // Vertex pair for the link
+	btAlignedObjectArray< float >								m_linkStrength; // Strength of each link
+	// (inverseMassA + inverseMassB)/ linear stiffness coefficient
+	btAlignedObjectArray< float >								m_linksMassLSC; 
+	btAlignedObjectArray< float >								m_linksRestLengthSquared; 
+	// Current vector length of link
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >			m_linksCLength;
+	// 1/(current length * current length * massLSC)
+	btAlignedObjectArray< float >								m_linksLengthRatio; 
+	btAlignedObjectArray< float >								m_linksRestLength;
+	btAlignedObjectArray< float >								m_linksMaterialLinearStiffnessCoefficient;
+
+public:
+	btSoftBodyLinkData()
+	{
+	}
+
+	virtual ~btSoftBodyLinkData()
+	{
+	}
+
+	virtual void clear()
+	{
+		m_links.resize(0);
+		m_linkStrength.resize(0);
+		m_linksMassLSC.resize(0);
+		m_linksRestLengthSquared.resize(0);
+		m_linksLengthRatio.resize(0);
+		m_linksRestLength.resize(0);
+		m_linksMaterialLinearStiffnessCoefficient.resize(0);
+	}
+
+	int getNumLinks()
+	{
+		return m_links.size();
+	}
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks )
+	{
+		int previousSize = m_links.size();
+		int newSize = previousSize + numLinks;
+
+		// Resize all the arrays that store link data
+		m_links.resize( newSize );
+		m_linkStrength.resize( newSize );
+		m_linksMassLSC.resize( newSize );
+		m_linksRestLengthSquared.resize( newSize );
+		m_linksCLength.resize( newSize );
+		m_linksLengthRatio.resize( newSize );
+		m_linksRestLength.resize( newSize );
+		m_linksMaterialLinearStiffnessCoefficient.resize( newSize );
+	}
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex )
+	{
+		m_links[linkIndex] = link.getVertexPair();
+		m_linkStrength[linkIndex] = link.getLinkStrength();
+		m_linksMassLSC[linkIndex] = 0.f;
+		m_linksRestLengthSquared[linkIndex] = 0.f;
+		m_linksCLength[linkIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_linksLengthRatio[linkIndex] = 0.f;
+		m_linksRestLength[linkIndex] = 0.f;
+		m_linksMaterialLinearStiffnessCoefficient[linkIndex] = link.getLinkLinearStiffness();
+	}
+
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data from host memory from the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator()
+	{
+		return true;
+	}
+
+
+
+	/**
+	 * Return reference to the vertex index pair for link linkIndex as stored on the host.
+	 */
+	LinkNodePair &getVertexPair( int linkIndex )
+	{
+		return m_links[linkIndex];
+	}
+
+	/** 
+	 * Return reference to strength of link linkIndex as stored on the host.
+	 */
+	float &getStrength( int linkIndex )
+	{
+		return m_linkStrength[linkIndex];
+	}
+
+	/**
+	 * Return a reference to the strength of the link corrected for link sorting.
+	 * This is important if we are using data on an accelerator which has the data sorted in some fashion.
+	 */
+	virtual float &getStrengthCorrected( int linkIndex )
+	{
+		return getStrength( linkIndex );
+	}
+
+	/**
+	 * Return reference to the rest length of link linkIndex as stored on the host.
+	 */
+	float &getRestLength( int linkIndex )
+	{
+		return m_linksRestLength[linkIndex];
+	}
+
+	/**
+	 * Return reference to linear stiffness coefficient for link linkIndex as stored on the host.
+	 */
+	float &getLinearStiffnessCoefficient( int linkIndex )
+	{
+		return m_linksMaterialLinearStiffnessCoefficient[linkIndex];
+	}
+
+	/**
+	 * Return reference to the MassLSC value for link linkIndex as stored on the host.
+	 */
+	float &getMassLSC( int linkIndex )
+	{
+		return m_linksMassLSC[linkIndex];
+	}
+
+	/**
+	 * Return reference to rest length squared for link linkIndex as stored on the host.
+	 */
+	float &getRestLengthSquared( int linkIndex )
+	{
+		return m_linksRestLengthSquared[linkIndex];
+	}
+
+	/**
+	 * Return reference to current length of link linkIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getCurrentLength( int linkIndex )
+	{
+		return m_linksCLength[linkIndex];
+	}
+
+	 /**
+	  * Return the link length ratio from for link linkIndex as stored on the host.
+	  */
+	 float &getLinkLengthRatio( int linkIndex )
+	 {
+		 return m_linksLengthRatio[linkIndex];
+	 }
+};
+
+
+
+/**
+ * Wrapper for vertex data information.
+ * By wrapping it like this we stand a good chance of being able to optimise for storage format easily.
+ * It should also help us make sure all the data structures remain consistent.
+ */
+class btSoftBodyVertexData
+{
+public:
+	/**
+	 * Class describing a vertex for input into the system.
+	 */
+	class VertexDescription
+	{
+	private:
+		Vectormath::Aos::Point3 m_position;
+		/** Inverse mass. If this is 0f then the mass was 0 because that simplifies calculations. */
+		float m_inverseMass;
+
+	public:
+		VertexDescription()
+		{	
+			m_position = Vectormath::Aos::Point3( 0.f, 0.f, 0.f );
+			m_inverseMass = 0.f;
+		}
+
+		VertexDescription( const Vectormath::Aos::Point3 &position, float mass )
+		{
+			m_position = position;
+			if( mass > 0.f )
+				m_inverseMass = 1.0f/mass;
+			else
+				m_inverseMass = 0.f;
+		}
+
+		void setPosition( const Vectormath::Aos::Point3 &position )
+		{
+			m_position = position;
+		}
+
+		void setInverseMass( float inverseMass )
+		{
+			m_inverseMass = inverseMass;
+		}
+
+		void setMass( float mass )
+		{
+			if( mass > 0.f )
+				m_inverseMass = 1.0f/mass;
+			else
+				m_inverseMass = 0.f;
+		}
+
+		Vectormath::Aos::Point3 getPosition() const
+		{
+			return m_position;
+		}
+
+		float getInverseMass() const
+		{
+			return m_inverseMass;
+		}
+
+		float getMass() const
+		{
+			if( m_inverseMass == 0.f )
+				return 0.f;
+			else
+				return 1.0f/m_inverseMass;
+		}
+	};
+protected:
+
+	// identifier for the individual cloth
+	// For the CPU we don't really need this as we can grab the cloths and iterate over only their vertices
+	// For a parallel accelerator knowing on a per-vertex basis which cloth we're part of will help for obtaining
+	// per-cloth data
+	// For sorting etc it might also be helpful to be able to use in-array data such as this.
+	btAlignedObjectArray< int >							m_clothIdentifier;
+	btAlignedObjectArray< Vectormath::Aos::Point3 >		m_vertexPosition;			// vertex positions
+	btAlignedObjectArray< Vectormath::Aos::Point3 >		m_vertexPreviousPosition;	// vertex positions
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexVelocity;			// Velocity
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexForceAccumulator;	// Force accumulator
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexNormal;				// Normals
+	btAlignedObjectArray< float >						m_vertexInverseMass;		// Inverse mass
+	btAlignedObjectArray< float >						m_vertexArea;				// Area controlled by the vertex
+	btAlignedObjectArray< int >							m_vertexTriangleCount;		// Number of triangles touching this vertex
+
+public:
+	btSoftBodyVertexData()
+	{
+	}
+
+	virtual ~btSoftBodyVertexData()
+	{
+	}
+
+	virtual void clear()
+	{
+		m_clothIdentifier.resize(0);
+		m_vertexPosition.resize(0);
+		m_vertexPreviousPosition.resize(0);
+		m_vertexVelocity.resize(0);
+		m_vertexForceAccumulator.resize(0);
+		m_vertexNormal.resize(0);
+		m_vertexInverseMass.resize(0);
+		m_vertexArea.resize(0);
+		m_vertexTriangleCount.resize(0);
+	}
+
+	int getNumVertices()
+	{
+		return m_vertexPosition.size();
+	}
+
+	int getClothIdentifier( int vertexIndex )
+	{
+		return m_clothIdentifier[vertexIndex];
+	}
+
+	void setVertexAt( const VertexDescription &vertex, int vertexIndex )
+	{
+		m_vertexPosition[vertexIndex] = vertex.getPosition();
+		m_vertexPreviousPosition[vertexIndex] = vertex.getPosition();
+		m_vertexVelocity[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexForceAccumulator[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexNormal[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexInverseMass[vertexIndex] = vertex.getInverseMass();
+		m_vertexArea[vertexIndex] = 0.f;
+		m_vertexTriangleCount[vertexIndex] = 0;
+	}
+
+	/** 
+	 * Create numVertices new vertices for cloth clothIdentifier 
+	 * maxVertices allows a buffer zone of extra vertices for alignment or tearing reasons.
+	 */
+	void createVertices( int numVertices, int clothIdentifier, int maxVertices = 0 )
+	{
+		int previousSize = m_vertexPosition.size();
+		if( maxVertices == 0 )
+			maxVertices = numVertices;
+		int newSize = previousSize + maxVertices;
+
+		// Resize all the arrays that store vertex data
+		m_clothIdentifier.resize( newSize );
+		m_vertexPosition.resize( newSize );
+		m_vertexPreviousPosition.resize( newSize );
+		m_vertexVelocity.resize( newSize );
+		m_vertexForceAccumulator.resize( newSize );
+		m_vertexNormal.resize( newSize );
+		m_vertexInverseMass.resize( newSize );
+		m_vertexArea.resize( newSize );
+		m_vertexTriangleCount.resize( newSize );
+
+		for( int vertexIndex = previousSize; vertexIndex < newSize; ++vertexIndex )
+			m_clothIdentifier[vertexIndex] = clothIdentifier;
+		for( int vertexIndex = (previousSize + numVertices); vertexIndex < newSize; ++vertexIndex )
+			m_clothIdentifier[vertexIndex] = -1;
+	}
+
+	// Get and set methods in header so they can be inlined
+
+	/**
+	 * Return a reference to the position of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Point3 &getPosition( int vertexIndex )
+	{
+		return m_vertexPosition[vertexIndex];
+	}
+
+	Vectormath::Aos::Point3 getPosition( int vertexIndex ) const
+	{
+		return m_vertexPosition[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the previous position of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Point3 &getPreviousPosition( int vertexIndex )
+	{
+		return m_vertexPreviousPosition[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the velocity of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getVelocity( int vertexIndex )
+	{
+		return m_vertexVelocity[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the force accumulator of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getForceAccumulator( int vertexIndex )
+	{
+		return m_vertexForceAccumulator[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the normal of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getNormal( int vertexIndex )
+	{
+		return m_vertexNormal[vertexIndex];
+	}
+
+	Vectormath::Aos::Vector3 getNormal( int vertexIndex ) const
+	{
+		return m_vertexNormal[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the inverse mass of vertex vertexIndex as stored on the host.
+	 */
+	float &getInverseMass( int vertexIndex )
+	{
+		return m_vertexInverseMass[vertexIndex];
+	}
+
+	/**
+	 * Get access to the area controlled by this vertex.
+	 */
+	float &getArea( int vertexIndex )
+	{
+		return m_vertexArea[vertexIndex];
+	}
+
+	/**
+	 * Get access to the array of how many triangles touch each vertex.
+	 */
+	int &getTriangleCount( int vertexIndex )
+	{
+		return m_vertexTriangleCount[vertexIndex];
+	}
+
+
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data to host memory from the accelerator if bCopy is false.
+	 * If bCopy is true, copy data to host memory from the accelerator so that data 
+	 * won't be moved to accelerator when moveToAccelerator() is called next time. 
+	 * If bCopyMinimum is true, only vertex position and normal are copied.
+	 * bCopyMinimum will be meaningful only if bCopy is true.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
+	{
+		return true;
+	}
+
+	btAlignedObjectArray< Vectormath::Aos::Point3 >	&getVertexPositions()
+	{
+		return m_vertexPosition;
+	}
+};
+
+
+class btSoftBodyTriangleData
+{
+public:
+	/**
+	 * Class representing a triangle as a set of three indices into the
+	 * vertex array.
+	 */
+	class TriangleNodeSet
+	{
+	public:
+		int vertex0;
+		int vertex1;
+		int vertex2;
+		int _padding;
+
+		TriangleNodeSet( )
+		{
+			vertex0 = 0;
+			vertex1 = 0;
+			vertex2 = 0;
+			_padding = -1;
+		}
+
+		TriangleNodeSet( int newVertex0, int newVertex1, int newVertex2 )
+		{
+			vertex0 = newVertex0;
+			vertex1 = newVertex1;
+			vertex2 = newVertex2;
+		}
+	};
+
+	class TriangleDescription
+	{
+	protected:
+		int m_vertex0;
+		int m_vertex1;
+		int m_vertex2;
+
+	public:
+		TriangleDescription()
+		{
+			m_vertex0 = 0;
+			m_vertex1 = 0;
+			m_vertex2 = 0;
+		}
+
+		TriangleDescription( int newVertex0, int newVertex1, int newVertex2 )
+		{
+			m_vertex0 = newVertex0;
+			m_vertex1 = newVertex1;
+			m_vertex2 = newVertex2;
+		}
+
+		TriangleNodeSet getVertexSet() const
+		{
+			btSoftBodyTriangleData::TriangleNodeSet nodes;
+			nodes.vertex0 = m_vertex0;
+			nodes.vertex1 = m_vertex1;
+			nodes.vertex2 = m_vertex2;
+			return nodes;
+		}
+	};
+
+protected:
+	// NOTE:
+	// Vertex reference data is stored relative to global array, not relative to individual cloth.
+	// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
+	// to another.
+	btAlignedObjectArray< TriangleNodeSet > m_vertexIndices;
+	btAlignedObjectArray< float > m_area;
+	btAlignedObjectArray< Vectormath::Aos::Vector3 > m_normal;
+
+public:
+	btSoftBodyTriangleData()
+	{
+	}
+
+	virtual ~btSoftBodyTriangleData()
+	{
+
+	}
+
+	virtual void clear()
+	{
+		m_vertexIndices.resize(0);
+		m_area.resize(0);
+		m_normal.resize(0);
+	}
+
+	int getNumTriangles()
+	{
+		return m_vertexIndices.size();
+	}
+
+	virtual void setTriangleAt( const TriangleDescription &triangle, int triangleIndex )
+	{
+		m_vertexIndices[triangleIndex] = triangle.getVertexSet();
+	}
+
+	virtual void createTriangles( int numTriangles )		
+	{
+		int previousSize = m_vertexIndices.size();
+		int newSize = previousSize + numTriangles;
+
+		// Resize all the arrays that store triangle data
+		m_vertexIndices.resize( newSize );
+		m_area.resize( newSize );
+		m_normal.resize( newSize );
+	}
+
+	/**
+	 * Return the vertex index set for triangle triangleIndex as stored on the host.
+	 */
+	const TriangleNodeSet &getVertexSet( int triangleIndex )
+	{
+		return m_vertexIndices[triangleIndex];
+	}
+
+	/**
+	 * Get access to the triangle area.
+	 */
+	float &getTriangleArea( int triangleIndex )
+	{
+		return m_area[triangleIndex];
+	}
+
+	/**
+	 * Get access to the normal vector for this triangle.
+	 */
+	Vectormath::Aos::Vector3 &getNormal( int triangleIndex )
+	{
+		return m_normal[triangleIndex];
+	}
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data from host memory from the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator()
+	{
+		return true;
+	}
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_DATA_H
+