Bullet Library v2.81

This commit is contained in:
LuisAntonRebollo 2013-07-04 20:50:16 +02:00
parent 64fef8b2ad
commit 1eb94f4828
462 changed files with 59613 additions and 8036 deletions

View file

@ -0,0 +1,13 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SUBDIRS (
OpenCL
)
IF( USE_DX11 )
SUBDIRS( DX11 )
ENDIF( USE_DX11 )

View file

@ -0,0 +1,83 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SET(DXSDK_DIR $ENV{DXSDK_DIR})
SET(DX11_INCLUDE_PATH "${DIRECTX_SDK_BASE_DIR}/Include" CACHE DOCSTRING "Microsoft directX SDK include path")
INCLUDE_DIRECTORIES(
${DX11_INCLUDE_PATH} "../Shared/"
${VECTOR_MATH_INCLUDE}
)
SET(BulletSoftBodyDX11Solvers_SRCS
btSoftBodySolver_DX11.cpp
btSoftBodySolver_DX11SIMDAware.cpp
)
SET(BulletSoftBodyDX11Solvers_HDRS
btSoftBodySolver_DX11.h
btSoftBodySolver_DX11SIMDAware.h
../Shared/btSoftBodySolverData.h
btSoftBodySolverVertexData_DX11.h
btSoftBodySolverTriangleData_DX11.h
btSoftBodySolverLinkData_DX11.h
btSoftBodySolverLinkData_DX11SIMDAware.h
btSoftBodySolverBuffer_DX11.h
btSoftBodySolverVertexBuffer_DX11.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyDX11Solvers_Shaders
OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
ComputeBounds
SolvePositions
SolvePositionsSIMDBatched
SolveCollisionsAndUpdateVelocities
SolveCollisionsAndUpdateVelocitiesSIMDBatched
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyDX11Solvers_Shaders})
LIST(APPEND BulletSoftBodyDX11Solvers_HLSL "HLSL/${f}.hlsl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_DX11 ${BulletSoftBodyDX11Solvers_SRCS} ${BulletSoftBodyDX11Solvers_HDRS} ${BulletSoftBodyDX11Solvers_HLSL})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_DX11 BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES PUBLIC_HEADER "${BulletSoftBodyDX11Solvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,95 @@
MSTRINGIFY(
cbuffer ApplyForcesCB : register( b0 )
{
unsigned int numNodes;
float solverdt;
float epsilon;
int padding3;
};
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexNormal : register( t1 );
StructuredBuffer<float> g_vertexArea : register( t2 );
StructuredBuffer<float> g_vertexInverseMass : register( t3 );
// TODO: These could be combined into a lift/drag factor array along with medium density
StructuredBuffer<float> g_clothLiftFactor : register( t4 );
StructuredBuffer<float> g_clothDragFactor : register( t5 );
StructuredBuffer<float4> g_clothWindVelocity : register( t6 );
StructuredBuffer<float4> g_clothAcceleration : register( t7 );
StructuredBuffer<float> g_clothMediumDensity : register( t8 );
RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
float3 projectOnAxis( float3 v, float3 a )
{
return (a*dot(v, a));
}
[numthreads(128, 1, 1)]
void
ApplyForcesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
unsigned int nodeID = DTid.x;
if( nodeID < numNodes )
{
int clothId = g_vertexClothIdentifier[nodeID];
float nodeIM = g_vertexInverseMass[nodeID];
if( nodeIM > 0.0f )
{
float3 nodeV = g_vertexVelocity[nodeID].xyz;
float3 normal = g_vertexNormal[nodeID].xyz;
float area = g_vertexArea[nodeID];
float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
// Read per-cloth values
float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
float liftFactor = g_clothLiftFactor[clothId];
float dragFactor = g_clothDragFactor[clothId];
float mediumDensity = g_clothMediumDensity[clothId];
// Apply the acceleration to the cloth rather than do this via a force
nodeV += (clothAcceleration*solverdt);
g_vertexVelocity[nodeID] = float4(nodeV, 0.f);
float3 relativeWindVelocity = nodeV - clothWindVelocity;
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
if( relativeSpeedSquared > epsilon )
{
// Correct direction of normal relative to wind direction and get dot product
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
float dvNormal = dot(normal, relativeWindVelocity);
if( dvNormal > 0 )
{
float3 force = float3(0.f, 0.f, 0.f);
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
float c1 = c0 * mediumDensity;
force += normal * (-c1 * liftFactor);
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
float dtim = solverdt * nodeIM;
float3 forceDTIM = force * dtim;
float3 nodeFPlusForce = nodeF + force;
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
nodeF = nodeFPlusForce;
if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
nodeF = nodeFMinus;
g_vertexForceAccumulator[nodeID] = float4(nodeF, 0.0f);
}
}
}
}
}
);

View file

@ -0,0 +1,83 @@
MSTRINGIFY(
cbuffer ComputeBoundsCB : register( b0 )
{
int numNodes;
int numSoftBodies;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
RWStructuredBuffer<uint4> g_clothMinBounds : register( u0 );
RWStructuredBuffer<uint4> g_clothMaxBounds : register( u1 );
groupshared uint4 clothMinBounds[256];
groupshared uint4 clothMaxBounds[256];
[numthreads(128, 1, 1)]
void
ComputeBoundsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const unsigned int UINT_MAX = 0xffffffff;
// Init min and max bounds arrays
if( GTid.x < numSoftBodies )
{
clothMinBounds[GTid.x] = uint4(UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX);
clothMaxBounds[GTid.x] = uint4(0,0,0,0);
}
AllMemoryBarrierWithGroupSync();
int nodeID = DTid.x;
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float3 position = g_vertexPositions[nodeID].xyz;
// Reinterpret position as uint
uint3 positionUInt = uint3(asuint(position.x), asuint(position.y), asuint(position.z));
// Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints
//positionUInt.x ^= uint((-int(positionUInt.x >> 31) | 0x80000000));
//positionUInt.y ^= uint((-int(positionUInt.y >> 31) | 0x80000000));
//positionUInt.z ^= uint((-int(positionUInt.z >> 31) | 0x80000000));
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
InterlockedMin(clothMinBounds[clothIdentifier].x, positionUInt.x);
InterlockedMin(clothMinBounds[clothIdentifier].y, positionUInt.y);
InterlockedMin(clothMinBounds[clothIdentifier].z, positionUInt.z);
InterlockedMax(clothMaxBounds[clothIdentifier].x, positionUInt.x);
InterlockedMax(clothMaxBounds[clothIdentifier].y, positionUInt.y);
InterlockedMax(clothMaxBounds[clothIdentifier].z, positionUInt.z);
}
}
AllMemoryBarrierWithGroupSync();
// Use global atomics to update the global versions of the data
if( GTid.x < numSoftBodies )
{
InterlockedMin(g_clothMinBounds[GTid.x].x, clothMinBounds[GTid.x].x);
InterlockedMin(g_clothMinBounds[GTid.x].y, clothMinBounds[GTid.x].y);
InterlockedMin(g_clothMinBounds[GTid.x].z, clothMinBounds[GTid.x].z);
InterlockedMax(g_clothMaxBounds[GTid.x].x, clothMaxBounds[GTid.x].x);
InterlockedMax(g_clothMaxBounds[GTid.x].y, clothMaxBounds[GTid.x].y);
InterlockedMax(g_clothMaxBounds[GTid.x].z, clothMaxBounds[GTid.x].z);
}
}
);

View file

@ -0,0 +1,41 @@
MSTRINGIFY(
cbuffer IntegrateCB : register( b0 )
{
int numNodes;
float solverdt;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<float> g_vertexInverseMasses : register( t0 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u2 );
RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u3 );
[numthreads(128, 1, 1)]
void
IntegrateKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 velocity = g_vertexVelocity[nodeID].xyz;
float3 force = g_vertexForceAccumulator[nodeID].xyz;
float inverseMass = g_vertexInverseMasses[nodeID];
g_vertexPreviousPositions[nodeID] = float4(position, 0.f);
velocity += force * inverseMass * solverdt;
position += velocity * solverdt;
g_vertexForceAccumulator[nodeID] = float4(0.f, 0.f, 0.f, 0.0f);
g_vertexPositions[nodeID] = float4(position, 0.f);
g_vertexVelocity[nodeID] = float4(velocity, 0.f);
}
}
);

View file

@ -0,0 +1,63 @@
MSTRINGIFY(
cbuffer OutputToVertexArrayCB : register( b0 )
{
int startNode;
int numNodes;
int positionOffset;
int positionStride;
int normalOffset;
int normalStride;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexPositions : register( t0 );
StructuredBuffer<float4> g_vertexNormals : register( t1 );
RWBuffer<float> g_vertexBuffer : register( u0 );
[numthreads(128, 1, 1)]
void
OutputToVertexArrayWithNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
int normalDestination = nodeID * normalStride + normalOffset;
g_vertexBuffer[normalDestination] = normal.x;
g_vertexBuffer[normalDestination+1] = normal.y;
g_vertexBuffer[normalDestination+2] = normal.z;
}
}
[numthreads(128, 1, 1)]
void
OutputToVertexArrayWithoutNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
}
}
);

View file

@ -0,0 +1,44 @@
MSTRINGIFY(
cbuffer PrepareLinksCB : register( b0 )
{
int numLinks;
int padding0;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float> g_linksMassLSC : register( t1 );
StructuredBuffer<float4> g_nodesPreviousPosition : register( t2 );
RWStructuredBuffer<float> g_linksLengthRatio : register( u0 );
RWStructuredBuffer<float4> g_linksCurrentLength : register( u1 );
[numthreads(128, 1, 1)]
void
PrepareLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x;
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
float massLSC = g_linksMassLSC[linkID];
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
linkLengthRatio = 1./linkLengthRatio;
g_linksCurrentLength[linkID] = linkCurrentLength;
g_linksLengthRatio[linkID] = linkLengthRatio;
}
}
);

View file

@ -0,0 +1,55 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
int startLink;
int numLinks;
float kst;
float ti;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float> g_linksMassLSC : register( t1 );
StructuredBuffer<float> g_linksRestLengthSquared : register( t2 );
StructuredBuffer<float> g_verticesInverseMass : register( t3 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
[numthreads(128, 1, 1)]
void
SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x + startLink;
if( DTid.x < numLinks )
{
float massLSC = g_linksMassLSC[linkID];
float restLengthSquared = g_linksRestLengthSquared[linkID];
if( massLSC > 0.0f )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_verticesInverseMass[node0];
float inverseMass1 = g_verticesInverseMass[node1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
g_vertexPositions[node0] = float4(position0, 0.f);
g_vertexPositions[node1] = float4(position1, 0.f);
}
}
}
);

View file

@ -0,0 +1,147 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
int startWaveInBatch;
int numWaves;
float kst;
float ti;
};
// Number of batches per wavefront stored one element per logical wavefront
StructuredBuffer<int2> g_wavefrontBatchCountsVertexCounts : register( t0 );
// Set of up to maxNumVertices vertex addresses per wavefront
StructuredBuffer<int> g_vertexAddressesPerWavefront : register( t1 );
StructuredBuffer<float> g_verticesInverseMass : register( t2 );
// Per-link data layed out structured in terms of sub batches within wavefronts
StructuredBuffer<int2> g_linksVertexIndices : register( t3 );
StructuredBuffer<float> g_linksMassLSC : register( t4 );
StructuredBuffer<float> g_linksRestLengthSquared : register( t5 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
// Data loaded on a per-wave basis
groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
// Storing the vertex addresses actually slowed things down a little
//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
[numthreads(BLOCK_SIZE, 1, 1)]
void
SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
int batchesWithinWavefront = 0;
int verticesUsedByWave = 0;
int cond = wavefront < (startWaveInBatch + numWaves);
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( cond)
{
// Load the batch counts for the wavefronts
int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
//vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress;
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
}
// Ensure compiler does not re-order memory operations
//AllMemoryBarrier();
AllMemoryBarrierWithGroupSync ();
if( cond)
{
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float3 position0 = vertexPositionSharedData[vertexAddress0].xyz;
float3 position1 = vertexPositionSharedData[vertexAddress1].xyz;
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
AllMemoryBarrier();
vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f);
vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f);
// Ensure compiler does not re-order memory operations
AllMemoryBarrier();
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
}
}
}
);

View file

@ -0,0 +1,48 @@
MSTRINGIFY(
cbuffer UpdateConstantsCB : register( b0 )
{
int numLinks;
int padding0;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
StructuredBuffer<float> g_vertexInverseMasses : register( t2 );
StructuredBuffer<float> g_linksMaterialLSC : register( t3 );
RWStructuredBuffer<float> g_linksMassLSC : register( u0 );
RWStructuredBuffer<float> g_linksRestLengthSquared : register( u1 );
RWStructuredBuffer<float> g_linksRestLengths : register( u2 );
[numthreads(128, 1, 1)]
void
UpdateConstantsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x;
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_vertexInverseMasses[node0];
float inverseMass1 = g_vertexInverseMasses[node1];
float3 difference = position0 - position1;
float length2 = dot(difference, difference);
float length = sqrt(length2);
g_linksRestLengths[linkID] = length;
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
g_linksRestLengthSquared[linkID] = length*length;
}
}
);

View file

@ -0,0 +1,49 @@
MSTRINGIFY(
cbuffer UpdateVelocitiesFromPositionsWithVelocitiesCB : register( b0 )
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexPositions : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<int> g_vertexClothIndices : register( t2 );
StructuredBuffer<float> g_clothVelocityCorrectionCoefficients : register( t3 );
StructuredBuffer<float> g_clothDampingFactor : register( t4 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
RWStructuredBuffer<float4> g_vertexForces : register( u1 );
[numthreads(128, 1, 1)]
void
updateVelocitiesFromPositionsWithVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
float3 velocity = g_vertexVelocities[nodeID].xyz;
int clothIndex = g_vertexClothIndices[nodeID];
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float3 difference = position - previousPosition;
velocity += difference*velocityCorrectionCoefficient*isolverdt;
// Damp the velocity
velocity *= velocityCoefficient;
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,98 @@
MSTRINGIFY(
cbuffer UpdateSoftBodiesCB : register( b0 )
{
unsigned int numNodes;
unsigned int startFace;
unsigned int numFaces;
float epsilon;
};
// Node indices for each link
StructuredBuffer<int4> g_triangleVertexIndexSet : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
StructuredBuffer<int> g_vertexTriangleCount : register( t2 );
RWStructuredBuffer<float4> g_vertexNormals : register( u0 );
RWStructuredBuffer<float> g_vertexArea : register( u1 );
RWStructuredBuffer<float4> g_triangleNormals : register( u2 );
RWStructuredBuffer<float> g_triangleArea : register( u3 );
[numthreads(128, 1, 1)]
void
ResetNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
if( DTid.x < numNodes )
{
g_vertexNormals[DTid.x] = float4(0.0f, 0.0f, 0.0f, 0.0f);
g_vertexArea[DTid.x] = 0.0f;
}
}
[numthreads(128, 1, 1)]
void
UpdateSoftBodiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int faceID = DTid.x + startFace;
if( DTid.x < numFaces )
{
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
int nodeIndex0 = triangleIndexSet.x;
int nodeIndex1 = triangleIndexSet.y;
int nodeIndex2 = triangleIndexSet.z;
float3 node0 = g_vertexPositions[nodeIndex0].xyz;
float3 node1 = g_vertexPositions[nodeIndex1].xyz;
float3 node2 = g_vertexPositions[nodeIndex2].xyz;
float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
float vertexArea0 = g_vertexArea[nodeIndex0];
float vertexArea1 = g_vertexArea[nodeIndex1];
float vertexArea2 = g_vertexArea[nodeIndex2];
float3 vector0 = node1 - node0;
float3 vector1 = node2 - node0;
float3 faceNormal = cross(vector0.xyz, vector1.xyz);
float triangleArea = length(faceNormal);
nodeNormal0 = nodeNormal0 + faceNormal;
nodeNormal1 = nodeNormal1 + faceNormal;
nodeNormal2 = nodeNormal2 + faceNormal;
vertexArea0 = vertexArea0 + triangleArea;
vertexArea1 = vertexArea1 + triangleArea;
vertexArea2 = vertexArea2 + triangleArea;
g_triangleNormals[faceID] = float4(normalize(faceNormal), 0.f);
g_vertexNormals[nodeIndex0] = float4(nodeNormal0, 0.f);
g_vertexNormals[nodeIndex1] = float4(nodeNormal1, 0.f);
g_vertexNormals[nodeIndex2] = float4(nodeNormal2, 0.f);
g_triangleArea[faceID] = triangleArea;
g_vertexArea[nodeIndex0] = vertexArea0;
g_vertexArea[nodeIndex1] = vertexArea1;
g_vertexArea[nodeIndex2] = vertexArea2;
}
}
[numthreads(128, 1, 1)]
void
NormalizeNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
if( DTid.x < numNodes )
{
float4 normal = g_vertexNormals[DTid.x];
float area = g_vertexArea[DTid.x];
int numTriangles = g_vertexTriangleCount[DTid.x];
float vectorLength = length(normal);
g_vertexNormals[DTid.x] = normalize(normal);
g_vertexArea[DTid.x] = area/float(numTriangles);
}
}
);

View file

@ -0,0 +1,44 @@
MSTRINGIFY(
cbuffer UpdateVelocitiesFromPositionsWithoutVelocitiesCB : register( b0 )
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexPositions : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<int> g_vertexClothIndices : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
RWStructuredBuffer<float4> g_vertexForces : register( u1 );
[numthreads(128, 1, 1)]
void
updateVelocitiesFromPositionsWithoutVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
float3 velocity = g_vertexVelocities[nodeID].xyz;
int clothIndex = g_vertexClothIndices[nodeID];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float3 difference = position - previousPosition;
velocity = difference*velocityCoefficient*isolverdt;
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,35 @@
MSTRINGIFY(
cbuffer UpdatePositionsFromVelocitiesCB : register( b0 )
{
int numNodes;
float solverSDT;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexVelocities : register( t0 );
RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u0 );
RWStructuredBuffer<float4> g_vertexCurrentPosition : register( u1 );
[numthreads(128, 1, 1)]
void
UpdatePositionsFromVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int vertexID = DTid.x;
if( vertexID < numNodes )
{
float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
float3 velocity = g_vertexVelocities[vertexID].xyz;
float3 newPosition = previousPosition + velocity*solverSDT;
g_vertexCurrentPosition[vertexID] = float4(newPosition, 0.f);
g_vertexPreviousPositions[vertexID] = float4(newPosition, 0.f);
}
}
);

View file

@ -0,0 +1,55 @@
MSTRINGIFY(
cbuffer VSolveLinksCB : register( b0 )
{
int startLink;
int numLinks;
float kst;
int padding;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float> g_linksLengthRatio : register( t1 );
StructuredBuffer<float4> g_linksCurrentLength : register( t2 );
StructuredBuffer<float> g_vertexInverseMass : register( t3 );
RWStructuredBuffer<float4> g_vertexVelocity : register( u0 );
[numthreads(128, 1, 1)]
void
VSolveLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x + startLink;
if( DTid.x < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linkLengthRatio = g_linksLengthRatio[linkID];
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
float vertexInverseMass0 = g_vertexInverseMass[node0];
float vertexInverseMass1 = g_vertexInverseMass[node1];
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
float dotResult = dot(linkCurrentLength, nodeDifference);
float j = -dotResult*linkLengthRatio*kst;
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
vertexVelocity0 += velocityChange0;
vertexVelocity1 -= velocityChange1;
g_vertexVelocity[node0] = float4(vertexVelocity0, 0.f);
g_vertexVelocity[node1] = float4(vertexVelocity1, 0.f);
}
}
);

View file

@ -0,0 +1,170 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
struct CollisionObjectIndices
{
int firstObject;
int endObject;
};
struct CollisionShapeDescription
{
float4x4 shapeTransform;
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
float margin;
float friction;
int padding0;
int padding1;
};
// From btBroadphaseProxy.h
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<float> g_perClothFriction : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
[numthreads(128, 1, 1)]
void
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
if( DTid.x < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
{
velocity = float3(15, 0, 0);
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
float4x4 worldTransform = shapeDescription.shapeTransform;
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
float4 worldC1 = mul(worldTransform, c1);
float4 worldC2 = mul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0 )
forceOnVertex -= frictionVector;
}
}
}
} else {
// Update velocity
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
}
);

View file

@ -0,0 +1,191 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
struct CollisionObjectIndices
{
int firstObject;
int endObject;
};
struct CollisionShapeDescription
{
float4x4 shapeTransform;
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
float margin;
float friction;
int padding0;
int padding1;
};
// From btBroadphaseProxy.h
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<float> g_perClothFriction : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
// A buffer of local collision shapes
// TODO: Iterate to support more than 16
groupshared CollisionShapeDescription localCollisionShapes[16];
[numthreads(128, 1, 1)]
void
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
// First load all of the collision objects into LDS
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( GTid.x < numObjects )
{
localCollisionShapes[GTid.x] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + GTid.x ];
}
}
// Safe as the vertices are padded so that not more than one soft body is in a group
AllMemoryBarrierWithGroupSync();
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
if( numObjects > 0 )
{
velocity = float3(0, 0, 0);
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
float4x4 worldTransform = localCollisionShapes[collision].shapeTransform;
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
float4 worldC1 = mul(worldTransform, c1);
float4 worldC2 = mul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0 )
forceOnVertex -= frictionVector;
}
}
}
} else {
// Update velocity
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
);

View file

@ -0,0 +1,323 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
#define BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
// DX11 support
#include <windows.h>
#include <crtdbg.h>
#include <d3d11.h>
#include <d3dx11.h>
#include <d3dcompiler.h>
#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
#endif
/**
* DX11 Buffer that tracks a host buffer on use to ensure size-correctness.
*/
template <typename ElementType> class btDX11Buffer
{
protected:
ID3D11Device* m_d3dDevice;
ID3D11DeviceContext* m_d3dDeviceContext;
ID3D11Buffer* m_Buffer;
ID3D11ShaderResourceView* m_SRV;
ID3D11UnorderedAccessView* m_UAV;
btAlignedObjectArray< ElementType >* m_CPUBuffer;
// TODO: Separate this from the main class
// as read back buffers can be shared between buffers
ID3D11Buffer* m_readBackBuffer;
int m_gpuSize;
bool m_onGPU;
bool m_readOnlyOnGPU;
bool createBuffer( ID3D11Buffer *preexistingBuffer = 0)
{
HRESULT hr = S_OK;
// Create all CS buffers
if( preexistingBuffer )
{
m_Buffer = preexistingBuffer;
} else {
D3D11_BUFFER_DESC buffer_desc;
ZeroMemory(&buffer_desc, sizeof(buffer_desc));
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
if( m_readOnlyOnGPU )
buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
else
buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
// At a minimum the buffer must exist
if( buffer_desc.ByteWidth == 0 )
buffer_desc.ByteWidth = sizeof(ElementType);
buffer_desc.StructureByteStride = sizeof(ElementType);
hr = m_d3dDevice->CreateBuffer(&buffer_desc, NULL, &m_Buffer);
if( FAILED( hr ) )
return (hr==S_OK);
}
if( m_readOnlyOnGPU )
{
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
if( srvbuffer_desc.Buffer.ElementWidth == 0 )
srvbuffer_desc.Buffer.ElementWidth = 1;
hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
if( FAILED( hr ) )
return (hr==S_OK);
} else {
// Create SRV
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
if( srvbuffer_desc.Buffer.ElementWidth == 0 )
srvbuffer_desc.Buffer.ElementWidth = 1;
hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
if( FAILED( hr ) )
return (hr==S_OK);
// Create UAV
D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
uavbuffer_desc.Buffer.NumElements = m_CPUBuffer->size();
if( uavbuffer_desc.Buffer.NumElements == 0 )
uavbuffer_desc.Buffer.NumElements = 1;
hr = m_d3dDevice->CreateUnorderedAccessView(m_Buffer, &uavbuffer_desc, &m_UAV);
if( FAILED( hr ) )
return (hr==S_OK);
// Create read back buffer
D3D11_BUFFER_DESC readback_buffer_desc;
ZeroMemory(&readback_buffer_desc, sizeof(readback_buffer_desc));
readback_buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
readback_buffer_desc.Usage = D3D11_USAGE_STAGING;
readback_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
readback_buffer_desc.StructureByteStride = sizeof(ElementType);
hr = m_d3dDevice->CreateBuffer(&readback_buffer_desc, NULL, &m_readBackBuffer);
if( FAILED( hr ) )
return (hr==S_OK);
}
m_gpuSize = m_CPUBuffer->size();
return true;
}
public:
btDX11Buffer( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext, btAlignedObjectArray< ElementType > *CPUBuffer, bool readOnly )
{
m_d3dDevice = d3dDevice;
m_d3dDeviceContext = d3dDeviceContext;
m_Buffer = 0;
m_SRV = 0;
m_UAV = 0;
m_readBackBuffer = 0;
m_CPUBuffer = CPUBuffer;
m_gpuSize = 0;
m_onGPU = false;
m_readOnlyOnGPU = readOnly;
}
virtual ~btDX11Buffer()
{
SAFE_RELEASE(m_Buffer);
SAFE_RELEASE(m_SRV);
SAFE_RELEASE(m_UAV);
SAFE_RELEASE(m_readBackBuffer);
}
ID3D11ShaderResourceView* &getSRV()
{
return m_SRV;
}
ID3D11UnorderedAccessView* &getUAV()
{
return m_UAV;
}
ID3D11Buffer* &getBuffer()
{
return m_Buffer;
}
/**
* Move the data to the GPU if it is not there already.
*/
bool moveToGPU()
{
// Reallocate if GPU size is too small
if( (m_CPUBuffer->size() > m_gpuSize ) )
m_onGPU = false;
if( !m_onGPU && m_CPUBuffer->size() > 0 )
{
// If the buffer doesn't exist or the CPU-side buffer has changed size, create
// We should really delete the old one, too, but let's leave that for later
if( !m_Buffer || (m_CPUBuffer->size() != m_gpuSize) )
{
SAFE_RELEASE(m_Buffer);
SAFE_RELEASE(m_SRV);
SAFE_RELEASE(m_UAV);
SAFE_RELEASE(m_readBackBuffer);
if( !createBuffer() )
{
btAssert("Buffer creation failed.");
return false;
}
}
if( m_gpuSize > 0 )
{
D3D11_BOX destRegion;
destRegion.left = 0;
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
m_d3dDeviceContext->UpdateSubresource(m_Buffer, 0, &destRegion, &((*m_CPUBuffer)[0]), 0, 0);
m_onGPU = true;
}
}
return true;
}
/**
* Move the data back from the GPU if it is on there and isn't read only.
*/
bool moveFromGPU()
{
if( m_CPUBuffer->size() > 0 )
{
if( m_onGPU && !m_readOnlyOnGPU )
{
// Copy back
D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
//m_pd3dImmediateContext->CopyResource(m_phAngVelReadBackBuffer, m_phAngVel);
D3D11_BOX destRegion;
destRegion.left = 0;
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
m_d3dDeviceContext->CopySubresourceRegion(
m_readBackBuffer,
0,
0,
0,
0 ,
m_Buffer,
0,
&destRegion
);
m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);
//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));
m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
m_onGPU = false;
}
}
return true;
}
/**
* Copy the data back from the GPU without changing its state to be CPU-side.
* Useful if we just want to view it on the host for visualization.
*/
bool copyFromGPU()
{
if( m_CPUBuffer->size() > 0 )
{
if( m_onGPU && !m_readOnlyOnGPU )
{
// Copy back
D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
D3D11_BOX destRegion;
destRegion.left = 0;
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
m_d3dDeviceContext->CopySubresourceRegion(
m_readBackBuffer,
0,
0,
0,
0 ,
m_Buffer,
0,
&destRegion
);
m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);
//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));
m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
}
}
return true;
}
/**
* Call if data has changed on the CPU.
* Can then trigger a move to the GPU as necessary.
*/
virtual void changedOnCPU()
{
m_onGPU = false;
}
}; // class btDX11Buffer
#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H

View file

@ -0,0 +1,103 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyLinkDataDX11 : public btSoftBodyLinkData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
btDX11Buffer<LinkNodePair> m_dx11Links;
btDX11Buffer<float> m_dx11LinkStrength;
btDX11Buffer<float> m_dx11LinksMassLSC;
btDX11Buffer<float> m_dx11LinksRestLengthSquared;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11LinksCLength;
btDX11Buffer<float> m_dx11LinksLengthRatio;
btDX11Buffer<float> m_dx11LinksRestLength;
btDX11Buffer<float> m_dx11LinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_batchStartLengths;
//ID3D11Buffer* readBackBuffer;
btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyLinkDataDX11();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H

View file

@ -0,0 +1,173 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
const int m_wavefrontSize;
const int m_linksPerWorkItem;
const int m_maxLinksPerWavefront;
int m_maxBatchesWithinWave;
int m_maxVerticesWithinWave;
int m_numWavefronts;
int m_maxVertex;
struct NumBatchesVerticesPair
{
int numBatches;
int numVertices;
};
// Array storing number of links in each wavefront
btAlignedObjectArray<int> m_linksPerWavefront;
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
btDX11Buffer< NumBatchesVerticesPair > m_dx11NumBatchesAndVerticesWithinWaves;
// All arrays here will contain batches of m_maxLinksPerWavefront links
// ordered by wavefront.
// with either global vertex pairs or local vertex pairs
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
btDX11Buffer<int> m_dx11WavefrontVerticesGlobalAddresses;
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
btDX11Buffer<LinkNodePair> m_dx11LinkVerticesLocalAddresses;
btDX11Buffer<float> m_dx11LinkStrength;
btDX11Buffer<float> m_dx11LinksMassLSC;
btDX11Buffer<float> m_dx11LinksRestLengthSquared;
btDX11Buffer<float> m_dx11LinksRestLength;
btDX11Buffer<float> m_dx11LinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
//ID3D11Buffer* readBackBuffer;
btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyLinkDataDX11SIMDAware();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
int getMaxVerticesPerWavefront()
{
return m_maxVerticesWithinWave;
}
int getWavefrontSize()
{
return m_wavefrontSize;
}
int getLinksPerWorkItem()
{
return m_linksPerWorkItem;
}
int getMaxLinksPerWavefront()
{
return m_maxLinksPerWavefront;
}
int getMaxBatchesPerWavefront()
{
return m_maxBatchesWithinWave;
}
int getNumWavefronts()
{
return m_numWavefronts;
}
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
{
return m_numBatchesAndVerticesWithinWaves[wavefront];
}
int getVertexGlobalAddresses( int vertexIndex )
{
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
}
/**
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
*/
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
{
return m_linkVerticesLocalAddresses[linkIndex];
}
};
#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H

View file

@ -0,0 +1,96 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyTriangleDataDX11 : public btSoftBodyTriangleData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
btDX11Buffer<btSoftBodyTriangleData::TriangleNodeSet> m_dx11VertexIndices;
btDX11Buffer<float> m_dx11Area;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11Normal;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_triangleAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_batchStartLengths;
//ID3D11Buffer* readBackBuffer;
public:
btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyTriangleDataDX11();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createTriangles( int numTriangles );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire triangle set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H

View file

@ -0,0 +1,107 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include <windows.h>
#include <crtdbg.h>
#include <d3d11.h>
#include <d3dx11.h>
#include <d3dcompiler.h>
class btDX11VertexBufferDescriptor : public btVertexBufferDescriptor
{
protected:
/** Context of the DX11 device on which the vertex buffer is stored. */
ID3D11DeviceContext* m_context;
/** DX11 vertex buffer */
ID3D11Buffer* m_vertexBuffer;
/** UAV for DX11 buffer */
ID3D11UnorderedAccessView* m_vertexBufferUAV;
public:
/**
* buffer is a pointer to the DX11 buffer to place the vertex data in.
* UAV is a pointer to the UAV representation of the buffer laid out in floats.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
*/
btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride )
{
m_context = context;
m_vertexBuffer = buffer;
m_vertexBufferUAV = UAV;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
}
/**
* buffer is a pointer to the DX11 buffer to place the vertex data in.
* UAV is a pointer to the UAV representation of the buffer laid out in floats.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
* normalOffset is the offset in floats to the first normal.
* normalStride is the stride in floats between normals.
*/
btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
{
m_context = context;
m_vertexBuffer = buffer;
m_vertexBufferUAV = UAV;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
m_normalOffset = normalOffset;
m_normalStride = normalStride;
m_hasNormals = true;
}
virtual ~btDX11VertexBufferDescriptor()
{
}
/**
* Return the type of the vertex buffer descriptor.
*/
virtual BufferTypes getBufferType() const
{
return DX11_BUFFER;
}
virtual ID3D11DeviceContext* getContext() const
{
return m_context;
}
virtual ID3D11Buffer* getbtDX11Buffer() const
{
return m_vertexBuffer;
}
virtual ID3D11UnorderedAccessView* getDX11UAV() const
{
return m_vertexBufferUAV;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H

View file

@ -0,0 +1,63 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
#define BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
class btSoftBodyLinkData;
class btSoftBodyLinkData::LinkDescription;
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyVertexDataDX11 : public btSoftBodyVertexData
{
protected:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
public:
btDX11Buffer<int> m_dx11ClothIdentifier;
btDX11Buffer<Vectormath::Aos::Point3> m_dx11VertexPosition;
btDX11Buffer<Vectormath::Aos::Point3> m_dx11VertexPreviousPosition;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11VertexVelocity;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11VertexForceAccumulator;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11VertexNormal;
btDX11Buffer<float> m_dx11VertexInverseMass;
btDX11Buffer<float> m_dx11VertexArea;
btDX11Buffer<int> m_dx11VertexTriangleCount;
//ID3D11Buffer* readBackBuffer;
public:
btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyVertexDataDX11();
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
};
#endif // #ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H

View file

@ -0,0 +1,691 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h"
#include "btSoftBodySolverVertexBuffer_DX11.h"
#include "btSoftBodySolverLinkData_DX11.h"
#include "btSoftBodySolverVertexData_DX11.h"
#include "btSoftBodySolverTriangleData_DX11.h"
class DXFunctions
{
public:
typedef HRESULT (WINAPI * CompileFromMemoryFunc)(LPCSTR,SIZE_T,LPCSTR,const D3D10_SHADER_MACRO*,LPD3D10INCLUDE,LPCSTR,LPCSTR,UINT,UINT,ID3DX11ThreadPump*,ID3D10Blob**,ID3D10Blob**,HRESULT*);
ID3D11Device * m_dx11Device;
ID3D11DeviceContext* m_dx11Context;
CompileFromMemoryFunc m_dx11CompileFromMemory;
DXFunctions(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, CompileFromMemoryFunc dx11CompileFromMemory) :
m_dx11Device( dx11Device ),
m_dx11Context( dx11Context ),
m_dx11CompileFromMemory( dx11CompileFromMemory )
{
}
class KernelDesc
{
protected:
public:
ID3D11ComputeShader* kernel;
ID3D11Buffer* constBuffer;
KernelDesc()
{
kernel = 0;
constBuffer = 0;
}
virtual ~KernelDesc()
{
// TODO: this should probably destroy its kernel but we need to be careful
// in case KernelDescs are copied
}
};
/**
* Compile a compute shader kernel from a string and return the appropriate KernelDesc object.
*/
KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 );
};
class btDX11SoftBodySolver : public btSoftBodySolver
{
protected:
/**
* Entry in the collision shape array.
* Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
*/
struct CollisionShapeDescription
{
Vectormath::Aos::Transform3 shapeTransform;
Vectormath::Aos::Vector3 linearVelocity;
Vectormath::Aos::Vector3 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Both needed for capsule
float radius;
float halfHeight;
float margin;
float friction;
CollisionShapeDescription()
{
collisionShapeType = 0;
margin = 0;
friction = 0;
}
};
struct UIntVector3
{
UIntVector3()
{
x = 0;
y = 0;
z = 0;
_padding = 0;
}
UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
{
x = x_;
y = y_;
z = z_;
_padding = 0;
}
unsigned int x;
unsigned int y;
unsigned int z;
unsigned int _padding;
};
public:
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices() const
{
return m_numVertices;
}
int getNumTriangles() const
{
return m_numTriangles;
}
int getMaxVertices() const
{
return m_maxVertices;
}
int getMaxTriangles() const
{
return m_maxTriangles;
}
int getFirstVertex() const
{
return m_firstVertex;
}
int getFirstTriangle() const
{
return m_firstTriangle;
}
/**
* Update the bounds in the btSoftBody object
*/
void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
};
struct CollisionObjectIndices
{
CollisionObjectIndices( int f, int e )
{
firstObject = f;
endObject = e;
}
int firstObject;
int endObject;
};
struct PrepareLinksCB
{
int numLinks;
int padding0;
int padding1;
int padding2;
};
struct SolvePositionsFromLinksKernelCB
{
int startLink;
int numLinks;
float kst;
float ti;
};
struct IntegrateCB
{
int numNodes;
float solverdt;
int padding1;
int padding2;
};
struct UpdatePositionsFromVelocitiesCB
{
int numNodes;
float solverSDT;
int padding1;
int padding2;
};
struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
struct UpdateVelocitiesFromPositionsWithVelocitiesCB
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
struct UpdateSoftBodiesCB
{
int numNodes;
int startFace;
int numFaces;
float epsilon;
};
struct ApplyForcesCB
{
unsigned int numNodes;
float solverdt;
float epsilon;
int padding3;
};
struct AddVelocityCB
{
int startNode;
int lastNode;
float velocityX;
float velocityY;
float velocityZ;
int padding1;
int padding2;
int padding3;
};
struct VSolveLinksCB
{
int startLink;
int numLinks;
float kst;
int padding;
};
struct ComputeBoundsCB
{
int numNodes;
int numSoftBodies;
int padding1;
int padding2;
};
struct SolveCollisionsAndUpdateVelocitiesCB
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
protected:
ID3D11Device * m_dx11Device;
ID3D11DeviceContext* m_dx11Context;
DXFunctions dxFunctions;
public:
/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
btSoftBodyLinkDataDX11 m_linkData;
btSoftBodyVertexDataDX11 m_vertexData;
btSoftBodyTriangleDataDX11 m_triangleData;
protected:
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
bool m_shadersInitialized;
/**
* Cloths owned by this solver.
* Only our cloths are in this array.
*/
btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11PerClothAcceleration;
/** Wind velocity to be applied normal to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11PerClothWindVelocity;
/** Velocity damping factor */
btAlignedObjectArray< float > m_perClothDampingFactor;
btDX11Buffer<float> m_dx11PerClothDampingFactor;
/** Velocity correction coefficient */
btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient;
btDX11Buffer<float> m_dx11PerClothVelocityCorrectionCoefficient;
/** Lift parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothLiftFactor;
btDX11Buffer<float> m_dx11PerClothLiftFactor;
/** Drag parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothDragFactor;
btDX11Buffer<float> m_dx11PerClothDragFactor;
/** Density of the medium in which each cloth sits */
btAlignedObjectArray< float > m_perClothMediumDensity;
btDX11Buffer<float> m_dx11PerClothMediumDensity;
/**
* Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
*/
btAlignedObjectArray< CollisionObjectIndices > m_perClothCollisionObjects;
btDX11Buffer<CollisionObjectIndices> m_dx11PerClothCollisionObjects;
/**
* Collision shapes being passed across to the cloths in this solver.
*/
btAlignedObjectArray< CollisionShapeDescription > m_collisionObjectDetails;
btDX11Buffer< CollisionShapeDescription > m_dx11CollisionObjectDetails;
/**
* Minimum bounds for each cloth.
* Updated by GPU and returned for use by broad phase.
* These are int vectors as a reminder that they store the int representation of a float, not a float.
* Bit 31 is inverted - is floats are stored with int-sortable values.
*/
btAlignedObjectArray< UIntVector3 > m_perClothMinBounds;
btDX11Buffer< UIntVector3 > m_dx11PerClothMinBounds;
/**
* Maximum bounds for each cloth.
* Updated by GPU and returned for use by broad phase.
* These are int vectors as a reminder that they store the int representation of a float, not a float.
* Bit 31 is inverted - is floats are stored with int-sortable values.
*/
btAlignedObjectArray< UIntVector3 > m_perClothMaxBounds;
btDX11Buffer< UIntVector3 > m_dx11PerClothMaxBounds;
/**
* Friction coefficient for each cloth
*/
btAlignedObjectArray< float > m_perClothFriction;
btDX11Buffer< float > m_dx11PerClothFriction;
DXFunctions::KernelDesc prepareLinksKernel;
DXFunctions::KernelDesc solvePositionsFromLinksKernel;
DXFunctions::KernelDesc vSolveLinksKernel;
DXFunctions::KernelDesc integrateKernel;
DXFunctions::KernelDesc addVelocityKernel;
DXFunctions::KernelDesc updatePositionsFromVelocitiesKernel;
DXFunctions::KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel;
DXFunctions::KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel;
DXFunctions::KernelDesc solveCollisionsAndUpdateVelocitiesKernel;
DXFunctions::KernelDesc resetNormalsAndAreasKernel;
DXFunctions::KernelDesc normalizeNormalsAndAreasKernel;
DXFunctions::KernelDesc computeBoundsKernel;
DXFunctions::KernelDesc updateSoftBodiesKernel;
DXFunctions::KernelDesc applyForcesKernel;
bool m_enableUpdateBounds;
/**
* Integrate motion on the solver.
*/
virtual void integrate( float solverdt );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
virtual bool buildShaders();
void resetNormalsAndAreas( int numVertices );
void normalizeNormalsAndAreas( int numVertices );
void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
void prepareCollisionConstraints();
Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
virtual void applyForces( float solverdt );
virtual void updateConstants( float timeStep );
int findSoftBodyIndex( const btSoftBody* const softBody );
//////////////////////////////////////
// Kernel dispatches
virtual void prepareLinks();
void updatePositionsFromVelocities( float solverdt );
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void solveLinksForVelocity( int startLink, int numLinks, float kst );
void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
void computeBounds( );
void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
void updateBounds();
void releaseKernels();
public:
btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
virtual ~btDX11SoftBodySolver();
virtual SolverTypes getSolverType() const
{
return DX_SOLVER;
}
void setEnableUpdateBounds(bool enableBounds)
{
m_enableUpdateBounds = enableBounds;
}
bool getEnableUpdateBounds() const
{
return m_enableUpdateBounds;
}
virtual btSoftBodyLinkData &getLinkData();
virtual btSoftBodyVertexData &getVertexData();
virtual btSoftBodyTriangleData &getTriangleData();
btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
const btAcceleratedSoftBodyInterface * const findSoftBodyInterface( const btSoftBody* const softBody ) const;
virtual bool checkInitialized();
virtual void updateSoftBodies( );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies(bool bMove = true);
virtual void solveConstraints( float solverdt );
virtual void predictMotion( float solverdt );
virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
virtual void processCollision( btSoftBody*, btSoftBody* );
};
/**
* Class to manage movement of data from a solver to a given target.
* This version is the DX to CPU version.
*/
class btSoftBodySolverOutputDXtoCPU : public btSoftBodySolverOutput
{
protected:
public:
btSoftBodySolverOutputDXtoCPU()
{
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
/**
* Class to manage movement of data from a solver to a given target.
* This version is the DX to DX version and subclasses DX to CPU so that it works for that too.
*/
class btSoftBodySolverOutputDXtoDX : public btSoftBodySolverOutputDXtoCPU
{
protected:
struct OutputToVertexArrayCB
{
int startNode;
int numNodes;
int positionOffset;
int positionStride;
int normalOffset;
int normalStride;
int padding1;
int padding2;
};
DXFunctions dxFunctions;
DXFunctions::KernelDesc outputToVertexArrayWithNormalsKernel;
DXFunctions::KernelDesc outputToVertexArrayWithoutNormalsKernel;
bool m_shadersInitialized;
bool checkInitialized();
bool buildShaders();
void releaseKernels();
public:
btSoftBodySolverOutputDXtoDX(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory) :
dxFunctions( dx11Device, dx11Context, dx11CompileFromMemory )
{
m_shadersInitialized = false;
}
~btSoftBodySolverOutputDXtoDX()
{
releaseKernels();
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H

View file

@ -0,0 +1,81 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "vectormath/vmInclude.h"
#include "btSoftBodySolver_DX11.h"
#include "btSoftBodySolverVertexBuffer_DX11.h"
#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
#include "btSoftBodySolverVertexData_DX11.h"
#include "btSoftBodySolverTriangleData_DX11.h"
#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
class btDX11SIMDAwareSoftBodySolver : public btDX11SoftBodySolver
{
protected:
struct SolvePositionsFromLinksKernelCB
{
int startWave;
int numWaves;
float kst;
float ti;
};
/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
btSoftBodyLinkDataDX11SIMDAware m_linkData;
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
virtual bool buildShaders();
void updateConstants( float timeStep );
//////////////////////////////////////
// Kernel dispatches
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
// End kernel dispatches
/////////////////////////////////////
public:
btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
virtual ~btDX11SIMDAwareSoftBodySolver();
virtual btSoftBodyLinkData &getLinkData();
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void solveConstraints( float solverdt );
virtual SolverTypes getSolverType() const
{
return DX_SIMD_SOLVER;
}
};
#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H

View file

@ -0,0 +1,23 @@
hasDX11 = findDirectX11()
if (hasDX11) then
project "BulletSoftBodyDX11Solvers"
initDirectX11()
kind "StaticLib"
targetdir "../../../../lib"
includedirs {
".",
"../../.."
}
files {
"**.cpp",
"**.h"
}
end

View file

@ -0,0 +1,62 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${AMD_OPENCL_INCLUDES}
)
ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../btSoftBodySolver_OpenCLSIMDAware.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_AMD BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,27 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "BulletSoftBodySolvers_OpenCL_AMD"
defines { "USE_AMD_OPENCL","CL_PLATFORM_AMD"}
initOpenCL_AMD()
kind "StaticLib"
targetdir "../../../../../lib"
includedirs {
".",
"../../../..",
"../../../../../Glut"
}
files {
"../btSoftBodySolver_OpenCL.cpp",
"../btSoftBodySolver_OpenCLSIMDAware.cpp",
"../btSoftBodySolverOutputCLtoGL.cpp"
}
end

View file

@ -0,0 +1,77 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
IF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES LINK_FLAGS "-framework OpenCL")
ENDIF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Apple BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,17 @@
SUBDIRS( MiniCL )
IF(BUILD_INTEL_OPENCL_DEMOS)
SUBDIRS(Intel)
ENDIF()
IF(BUILD_AMD_OPENCL_DEMOS)
SUBDIRS(AMD)
ENDIF()
IF(BUILD_NVIDIA_OPENCL_DEMOS)
SUBDIRS(NVidia)
ENDIF()
IF(APPLE AND OPENCL_LIBRARY)
SUBDIRS(Apple)
ENDIF()

View file

@ -0,0 +1,82 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${INTEL_OPENCL_INCLUDES}
)
ADD_DEFINITIONS(-DUSE_INTEL_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_INTEL)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../btSoftBodySolver_OpenCLSIMDAware.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Intel
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Intel BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,27 @@
hasCL = findOpenCL_Intel()
if (hasCL) then
project "BulletSoftBodySolvers_OpenCL_Intel"
defines { "USE_INTEL_OPENCL","CL_PLATFORM_INTEL"}
initOpenCL_Intel()
kind "StaticLib"
targetdir "../../../../../lib"
includedirs {
".",
"../../../..",
"../../../../../Glut"
}
files {
"../btSoftBodySolver_OpenCL.cpp",
"../btSoftBodySolver_OpenCLSIMDAware.cpp",
"../btSoftBodySolverOutputCLtoGL.cpp"
}
end

View file

@ -0,0 +1,75 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
ADD_DEFINITIONS(-DUSE_MINICL)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Mini MiniCL BulletMultiThreaded BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,249 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <MiniCL/cl_MiniCL_Defs.h>
#define MSTRINGIFY(A) A
#include "../OpenCLC10/ApplyForces.cl"
#include "../OpenCLC10/Integrate.cl"
#include "../OpenCLC10/PrepareLinks.cl"
#include "../OpenCLC10/SolvePositions.cl"
#include "../OpenCLC10/UpdateNodes.cl"
#include "../OpenCLC10/UpdateNormals.cl"
#include "../OpenCLC10/UpdatePositions.cl"
#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
#include "../OpenCLC10/VSolveLinks.cl"
#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
MINICL_REGISTER(PrepareLinksKernel)
MINICL_REGISTER(VSolveLinksKernel)
MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel)
MINICL_REGISTER(SolvePositionsFromLinksKernel)
MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel)
MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel)
MINICL_REGISTER(IntegrateKernel)
MINICL_REGISTER(ApplyForcesKernel)
MINICL_REGISTER(ResetNormalsAndAreasKernel)
MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
MINICL_REGISTER(UpdateSoftBodiesKernel)
MINICL_REGISTER(UpdateFixedVertexPositions)
float mydot3a(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = float4(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = float4(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = float4(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = float4(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions GUID_ARG)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position (g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition (g_vertexPreviousPositions[nodeID].xyz, 1.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
// Four columns of worldTransform matrix
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 (0.f, 0.f, 0.f, 1.f);
float4 c2 (0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (worldC2 - worldC1);
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
float4 closestPoint = (worldC1 + (segment * distanceAlongSegment));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = float4(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = float4(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = float4(normalize(position - closestPoint).xyz, 0.f);
}
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - float4(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4(normalVector*(minDistance - dist)*0.9f);
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = normalize(cross(normalVector, segment));
float4 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
}
g_vertexVelocities[nodeID] = float4(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
}
MINICL_REGISTER(SolveCollisionsAndUpdateVelocitiesKernel);

View file

@ -0,0 +1,81 @@
ADD_DEFINITIONS(-DUSE_NVIDIA_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_NVIDIA)
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${NVIDIA_OPENCL_INCLUDES}
)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_NVidia BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,27 @@
hasCL = findOpenCL_NVIDIA()
if (hasCL) then
project "BulletSoftBodySolvers_OpenCL_NVIDIA"
defines { "USE_NVIDIA_OPENCL","CL_PLATFORM_NVIDIA"}
initOpenCL_NVIDIA()
kind "StaticLib"
targetdir "../../../../../lib"
includedirs {
".",
"../../../..",
"../../../../../Glut"
}
files {
"../btSoftBodySolver_OpenCL.cpp",
"../btSoftBodySolver_OpenCLSIMDAware.cpp",
"../btSoftBodySolverOutputCLtoGL.cpp"
}
end

View file

@ -0,0 +1,102 @@
MSTRINGIFY(
float adot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
float alength3(float4 a)
{
a.w = 0;
return length(a);
}
float4 anormalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
float4 projectOnAxis( float4 v, float4 a )
{
return (a*adot3(v, a));
}
__kernel void
ApplyForcesKernel(
const uint numNodes,
const float solverdt,
const float epsilon,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexNormal,
__global float * g_vertexArea,
__global float * g_vertexInverseMass,
__global float * g_clothLiftFactor,
__global float * g_clothDragFactor,
__global float4 * g_clothWindVelocity,
__global float4 * g_clothAcceleration,
__global float * g_clothMediumDensity,
__global float4 * g_vertexForceAccumulator,
__global float4 * g_vertexVelocity GUID_ARG)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothId = g_vertexClothIdentifier[nodeID];
float nodeIM = g_vertexInverseMass[nodeID];
if( nodeIM > 0.0f )
{
float4 nodeV = g_vertexVelocity[nodeID];
float4 normal = g_vertexNormal[nodeID];
float area = g_vertexArea[nodeID];
float4 nodeF = g_vertexForceAccumulator[nodeID];
// Read per-cloth values
float4 clothAcceleration = g_clothAcceleration[clothId];
float4 clothWindVelocity = g_clothWindVelocity[clothId];
float liftFactor = g_clothLiftFactor[clothId];
float dragFactor = g_clothDragFactor[clothId];
float mediumDensity = g_clothMediumDensity[clothId];
// Apply the acceleration to the cloth rather than do this via a force
nodeV += (clothAcceleration*solverdt);
g_vertexVelocity[nodeID] = nodeV;
// Aerodynamics
float4 rel_v = nodeV - clothWindVelocity;
float rel_v_len = alength3(rel_v);
float rel_v2 = dot(rel_v, rel_v);
if( rel_v2 > epsilon )
{
float4 rel_v_nrm = anormalize3(rel_v);
float4 nrm = normal;
nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f);
float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f);
float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f);
float n_dot_v = dot(nrm, rel_v_nrm);
// drag force
if ( dragFactor > 0.f )
fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm;
// lift force
// Check angle of attack
// cos(10º) = 0.98480
if ( 0 < n_dot_v && n_dot_v < 0.98480f)
fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm));
nodeF += fDrag + fLift;
g_vertexForceAccumulator[nodeID] = nodeF;
}
}
}
}
);

View file

@ -0,0 +1,82 @@
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
__kernel void
ComputeBoundsKernel(
const int numNodes,
const int numSoftBodies,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexPositions,
/* Unfortunately, to get the atomics below to work these arrays cannot be */
/* uint4, though that is the layout of the data */
/* Therefore this is little-endian-only code */
volatile __global uint * g_clothMinBounds,
volatile __global uint * g_clothMaxBounds,
volatile __local uint * clothMinBounds,
volatile __local uint * clothMaxBounds)
{
// Init min and max bounds arrays
if( get_local_id(0) < numSoftBodies )
{
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
clothMaxBounds[get_local_id(0)*4] = 0;
clothMaxBounds[get_local_id(0)*4+1] = 0;
clothMaxBounds[get_local_id(0)*4+2] = 0;
clothMaxBounds[get_local_id(0)*4+3] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
/* Reinterpret position as uint */
uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
/* Use global atomics to update the global versions of the data */
if( get_local_id(0) < numSoftBodies )
{
/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
}
}
);

View file

@ -0,0 +1,35 @@
MSTRINGIFY(
// Node indices for each link
__kernel void
IntegrateKernel(
const int numNodes,
const float solverdt,
__global float * g_vertexInverseMasses,
__global float4 * g_vertexPositions,
__global float4 * g_vertexVelocity,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexForceAccumulator GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 velocity = g_vertexVelocity[nodeID];
float4 force = g_vertexForceAccumulator[nodeID];
float inverseMass = g_vertexInverseMasses[nodeID];
g_vertexPreviousPositions[nodeID] = position;
velocity += force * inverseMass * solverdt;
position += velocity * solverdt;
g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
g_vertexPositions[nodeID] = position;
g_vertexVelocity[nodeID] = velocity;
}
}
);

View file

@ -0,0 +1,46 @@
MSTRINGIFY(
__kernel void
OutputToVertexArrayWithNormalsKernel(
const int startNode, const int numNodes, __global float *g_vertexBuffer,
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions,
const int normalOffset, const int normalStride, const __global float4* g_vertexNormals )
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
int normalDestination = nodeID * normalStride + normalOffset;
g_vertexBuffer[normalDestination] = normal.x;
g_vertexBuffer[normalDestination+1] = normal.y;
g_vertexBuffer[normalDestination+2] = normal.z;
}
}
__kernel void
OutputToVertexArrayWithoutNormalsKernel(
const int startNode, const int numNodes, __global float *g_vertexBuffer,
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions )
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
}
}
);

View file

@ -0,0 +1,38 @@
MSTRINGIFY(
__kernel void
PrepareLinksKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float4 * g_nodesPreviousPosition,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength GUID_ARG)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
float massLSC = g_linksMassLSC[linkID];
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
linkCurrentLength.w = 0.f;
float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
linkLengthRatio = 1.0f/linkLengthRatio;
g_linksCurrentLength[linkID] = linkCurrentLength;
g_linksLengthRatio[linkID] = linkLengthRatio;
}
}
);

View file

@ -0,0 +1,204 @@
MSTRINGIFY(
float mydot3a(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions GUID_ARG)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
// Four columns of worldTransform matrix
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (worldC2 - worldC1);
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = (float4)(normalize(position - closestPoint).xyz, 0.f);
}
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + (float4)((minDistance - dist)*normalVector*0.9f);
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = normalize(cross(normalVector, segment));
float4 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
}
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
}
);

View file

@ -0,0 +1,242 @@
MSTRINGIFY(
//#pragma OPENCL EXTENSION cl_amd_printf:enable\n
float mydot3a(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
float mylength3(float4 a)
{
a.w = 0;
return length(a);
}
float4 mynormalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions,
__local CollisionShapeDescription *localCollisionShapes,
__global float * g_vertexInverseMasses)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 0.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
float inverseMass = g_vertexInverseMasses[nodeID];
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
// First load all of the collision objects into LDS
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( get_local_id(0) < numObjects )
{
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
}
}
// Safe as the vertices are padded so that not more than one soft body is in a group
barrier(CLK_LOCAL_MEM_FENCE);
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = localCollisionShapes[collision].friction;
if( localCollisionShapes[collision].collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
int capsuleupAxis = localCollisionShapes[collision].upAxis;
if ( capsuleHalfHeight <= 0 )
capsuleHalfHeight = 0.0001f;
float4 worldTransform[4];
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (float4)((worldC2 - worldC1).xyz, 0.f);
float4 segmentNormalized = mynormalize3(segment);
float distanceAlongSegment =mydot3a( (position - worldC1), segmentNormalized );
float4 closestPointOnSegment = (worldC1 + (float4)(segmentNormalized * distanceAlongSegment));
float distanceFromLine = mylength3(position - closestPointOnSegment);
float distanceFromC1 = mylength3(worldC1 - position);
float distanceFromC2 = mylength3(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > length(segment) ) {
dist = distanceFromC2;
normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = (float4)(normalize(position - closestPointOnSegment).xyz, 0.f);
}
float minDistance = capsuleRadius + capsuleMargin;
float4 closestPointOnSurface = (float4)((position + (minDistance - dist) * normalVector).xyz, 0.f);
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, closestPointOnSurface - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = closestPointOnSurface;
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = mynormalize3(cross(normalVector, segment));
float4 p2 = mynormalize3(cross(p1, normalVector));
float4 tangentialVel = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
float frictionCoef = (colliderFriction * clothFriction);
if (frictionCoef>1.f)
frictionCoef = 1.f;
//only apply friction if objects are not moving apart
float projVel = mydot3a(relativeVelocity,normalVector);
if ( projVel >= -0.001f)
{
if ( inverseMass > 0 )
{
//float4 myforceOnVertex = -tangentialVel * frictionCoef * isolverdt * (1.0f / inverseMass);
position += (-tangentialVel * frictionCoef) / (isolverdt);
}
}
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
}
}
}
}
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
);

View file

@ -0,0 +1,57 @@
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel void
SolvePositionsFromLinksKernel(
const int startLink,
const int numLinks,
const float kst,
const float ti,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions GUID_ARG)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
float massLSC = g_linksMassLSC[linkID];
float restLengthSquared = g_linksRestLengthSquared[linkID];
if( massLSC > 0.0f )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 position0 = g_vertexPositions[node0];
float4 position1 = g_vertexPositions[node1];
float inverseMass0 = g_verticesInverseMass[node0];
float inverseMass1 = g_verticesInverseMass[node1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
g_vertexPositions[node0] = position0;
g_vertexPositions[node1] = position1;
}
}
}
);

View file

@ -0,0 +1,130 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel __attribute__((reqd_work_group_size(WAVEFRONT_BLOCK_MULTIPLIER*WAVEFRONT_SIZE, 1, 1)))
void
SolvePositionsFromLinksKernel(
const int startWaveInBatch,
const int numWaves,
const float kst,
const float ti,
__global int2 *g_wavefrontBatchCountsVertexCounts,
__global int *g_vertexAddressesPerWavefront,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions,
__local int2 *wavefrontBatchCountsVertexCounts,
__local float4 *vertexPositionSharedData,
__local float *vertexInverseMassSharedData)
{
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
barrier(CLK_LOCAL_MEM_FENCE);
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float4 position0 = vertexPositionSharedData[vertexAddress0];
float4 position1 = vertexPositionSharedData[vertexAddress1];
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
barrier(CLK_LOCAL_MEM_FENCE);
vertexPositionSharedData[vertexAddress0] = position0;
vertexPositionSharedData[vertexAddress1] = position1;
// Ensure compiler does not re-order memory operations
barrier(CLK_LOCAL_MEM_FENCE);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
}
}
}
);

View file

@ -0,0 +1,44 @@
MSTRINGIFY(
/*#define float3 float4
float dot3(float3 a, float3 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}*/
__kernel void
UpdateConstantsKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float4 * g_vertexPositions,
__global float * g_vertexInverseMasses,
__global float * g_linksMaterialLSC,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_linksRestLengths)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_vertexInverseMasses[node0];
float inverseMass1 = g_vertexInverseMasses[node1];
float3 difference = position0 - position1;
float length2 = dot(difference, difference);
float length = sqrt(length2);
g_linksRestLengths[linkID] = length;
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
g_linksRestLengthSquared[linkID] = length*length;
}
}
);

View file

@ -0,0 +1,25 @@
MSTRINGIFY(
__kernel void
UpdateFixedVertexPositions(
const uint numNodes,
__global int * g_anchorIndex,
__global float4 * g_vertexPositions,
__global float4 * g_anchorPositions GUID_ARG)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int anchorIndex = g_anchorIndex[nodeID];
float4 position = g_vertexPositions[nodeID];
if ( anchorIndex >= 0 )
{
float4 anchorPosition = g_anchorPositions[anchorIndex];
g_vertexPositions[nodeID] = anchorPosition;
}
}
}
);

View file

@ -0,0 +1,39 @@
MSTRINGIFY(
__kernel void
updateVelocitiesFromPositionsWithVelocitiesKernel(
int numNodes,
float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float *g_clothVelocityCorrectionCoefficients,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 previousPosition = g_vertexPreviousPositions[nodeID];
float4 velocity = g_vertexVelocities[nodeID];
int clothIndex = g_vertexClothIndices[nodeID];
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
velocity += difference*velocityCorrectionCoefficient*isolverdt;
// Damp the velocity
velocity *= velocityCoefficient;
g_vertexVelocities[nodeID] = velocity;
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,102 @@
MSTRINGIFY(
float length3(float4 a)
{
a.w = 0;
return length(a);
}
float4 normalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
__kernel void
ResetNormalsAndAreasKernel(
const unsigned int numNodes,
__global float4 * g_vertexNormals,
__global float * g_vertexArea GUID_ARG)
{
if( get_global_id(0) < numNodes )
{
g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
g_vertexArea[get_global_id(0)] = 0.0f;
}
}
__kernel void
UpdateSoftBodiesKernel(
const unsigned int startFace,
const unsigned int numFaces,
__global int4 * g_triangleVertexIndexSet,
__global float4 * g_vertexPositions,
__global float4 * g_vertexNormals,
__global float * g_vertexArea,
__global float4 * g_triangleNormals,
__global float * g_triangleArea GUID_ARG)
{
int faceID = get_global_id(0) + startFace;
if( get_global_id(0) < numFaces )
{
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
int nodeIndex0 = triangleIndexSet.x;
int nodeIndex1 = triangleIndexSet.y;
int nodeIndex2 = triangleIndexSet.z;
float4 node0 = g_vertexPositions[nodeIndex0];
float4 node1 = g_vertexPositions[nodeIndex1];
float4 node2 = g_vertexPositions[nodeIndex2];
float4 nodeNormal0 = g_vertexNormals[nodeIndex0];
float4 nodeNormal1 = g_vertexNormals[nodeIndex1];
float4 nodeNormal2 = g_vertexNormals[nodeIndex2];
float vertexArea0 = g_vertexArea[nodeIndex0];
float vertexArea1 = g_vertexArea[nodeIndex1];
float vertexArea2 = g_vertexArea[nodeIndex2];
float4 vector0 = node1 - node0;
float4 vector1 = node2 - node0;
float4 faceNormal = cross(vector0, vector1);
float triangleArea = length(faceNormal);
nodeNormal0 = nodeNormal0 + faceNormal;
nodeNormal1 = nodeNormal1 + faceNormal;
nodeNormal2 = nodeNormal2 + faceNormal;
vertexArea0 = vertexArea0 + triangleArea;
vertexArea1 = vertexArea1 + triangleArea;
vertexArea2 = vertexArea2 + triangleArea;
g_triangleNormals[faceID] = normalize3(faceNormal);
g_vertexNormals[nodeIndex0] = nodeNormal0;
g_vertexNormals[nodeIndex1] = nodeNormal1;
g_vertexNormals[nodeIndex2] = nodeNormal2;
g_triangleArea[faceID] = triangleArea;
g_vertexArea[nodeIndex0] = vertexArea0;
g_vertexArea[nodeIndex1] = vertexArea1;
g_vertexArea[nodeIndex2] = vertexArea2;
}
}
__kernel void
NormalizeNormalsAndAreasKernel(
const unsigned int numNodes,
__global int * g_vertexTriangleCount,
__global float4 * g_vertexNormals,
__global float * g_vertexArea GUID_ARG)
{
if( get_global_id(0) < numNodes )
{
float4 normal = g_vertexNormals[get_global_id(0)];
float area = g_vertexArea[get_global_id(0)];
int numTriangles = g_vertexTriangleCount[get_global_id(0)];
float vectorLength = length3(normal);
g_vertexNormals[get_global_id(0)] = normalize3(normal);
g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
}
}
);

View file

@ -0,0 +1,34 @@
MSTRINGIFY(
__kernel void
updateVelocitiesFromPositionsWithoutVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 previousPosition = g_vertexPreviousPositions[nodeID];
float4 velocity = g_vertexVelocities[nodeID];
int clothIndex = g_vertexClothIndices[nodeID];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
velocity = difference*velocityCoefficient*isolverdt;
g_vertexVelocities[nodeID] = velocity;
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,28 @@
MSTRINGIFY(
__kernel void
UpdatePositionsFromVelocitiesKernel(
const int numNodes,
const float solverSDT,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexCurrentPosition GUID_ARG)
{
int vertexID = get_global_id(0);
if( vertexID < numNodes )
{
float4 previousPosition = g_vertexPreviousPositions[vertexID];
float4 velocity = g_vertexVelocities[vertexID];
float4 newPosition = previousPosition + velocity*solverSDT;
g_vertexCurrentPosition[vertexID] = newPosition;
g_vertexPreviousPositions[vertexID] = newPosition;
}
}
);

View file

@ -0,0 +1,45 @@
MSTRINGIFY(
__kernel void
VSolveLinksKernel(
int startLink,
int numLinks,
float kst,
__global int2 * g_linksVertexIndices,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength,
__global float * g_vertexInverseMass,
__global float4 * g_vertexVelocity GUID_ARG)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linkLengthRatio = g_linksLengthRatio[linkID];
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
float vertexInverseMass0 = g_vertexInverseMass[node0];
float vertexInverseMass1 = g_vertexInverseMass[node1];
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
float dotResult = dot(linkCurrentLength, nodeDifference);
float j = -dotResult*linkLengthRatio*kst;
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
vertexVelocity0 += velocityChange0;
vertexVelocity1 -= velocityChange1;
g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
}
}
);

View file

@ -0,0 +1,209 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
#define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
// OpenCL support
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif //__APPLE__
#endif//USE_MINICL
#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
#endif
template <typename ElementType> class btOpenCLBuffer
{
public:
cl_command_queue m_cqCommandQue;
cl_context m_clContext;
cl_mem m_buffer;
btAlignedObjectArray< ElementType > * m_CPUBuffer;
int m_gpuSize;
bool m_onGPU;
bool m_readOnlyOnGPU;
bool m_allocated;
bool createBuffer( cl_mem* preexistingBuffer = 0)
{
cl_int err;
if( preexistingBuffer )
{
m_buffer = *preexistingBuffer;
}
else {
cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
// At a minimum the buffer must exist
if( size == 0 )
size = sizeof(ElementType);
m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
if( err != CL_SUCCESS )
{
btAssert( "Buffer::Buffer(m_buffer)");
}
}
m_gpuSize = m_CPUBuffer->size();
return true;
}
public:
btOpenCLBuffer( cl_command_queue commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
:m_cqCommandQue(commandQue),
m_clContext(ctx),
m_buffer(0),
m_CPUBuffer(CPUBuffer),
m_gpuSize(0),
m_onGPU(false),
m_readOnlyOnGPU(readOnly),
m_allocated(false)
{
}
~btOpenCLBuffer()
{
clReleaseMemObject(m_buffer);
}
bool moveToGPU()
{
cl_int err;
if( (m_CPUBuffer->size() != m_gpuSize) )
{
m_onGPU = false;
}
if( !m_allocated && m_CPUBuffer->size() == 0 )
{
// If it isn't on the GPU and yet there is no data on the CPU side this may cause a problem with some kernels.
// We should create *something* on the device side
if (!createBuffer()) {
return false;
}
m_allocated = true;
}
if( !m_onGPU && m_CPUBuffer->size() > 0 )
{
if (!m_allocated || (m_CPUBuffer->size() != m_gpuSize)) {
if (!createBuffer()) {
return false;
}
m_allocated = true;
}
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer,
CL_FALSE,
0,
size,
&((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS )
{
btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" );
}
m_onGPU = true;
}
return true;
}
bool moveFromGPU()
{
cl_int err;
if (m_CPUBuffer->size() > 0) {
if (m_onGPU && !m_readOnlyOnGPU) {
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
err = clEnqueueReadBuffer(m_cqCommandQue,
m_buffer,
CL_TRUE,
0,
size,
&((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS )
{
btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)" );
}
m_onGPU = false;
}
}
return true;
}
bool copyFromGPU()
{
cl_int err;
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
if (m_CPUBuffer->size() > 0) {
if (m_onGPU && !m_readOnlyOnGPU) {
err = clEnqueueReadBuffer(m_cqCommandQue,
m_buffer,
CL_TRUE,
0,size,
&((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS )
{
btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)");
}
}
}
return true;
}
virtual void changedOnCPU()
{
m_onGPU = false;
}
}; // class btOpenCLBuffer
#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H

View file

@ -0,0 +1,99 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData
{
public:
bool m_onGPU;
cl_command_queue m_cqCommandQue;
btOpenCLBuffer<LinkNodePair> m_clLinks;
btOpenCLBuffer<float> m_clLinkStrength;
btOpenCLBuffer<float> m_clLinksMassLSC;
btOpenCLBuffer<float> m_clLinksRestLengthSquared;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clLinksCLength;
btOpenCLBuffer<float> m_clLinksLengthRatio;
btOpenCLBuffer<float> m_clLinksRestLength;
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_batchStartLengths;
btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyLinkDataOpenCL();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt(
const LinkDescription &link,
int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H

View file

@ -0,0 +1,169 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
{
public:
bool m_onGPU;
cl_command_queue m_cqCommandQue;
const int m_wavefrontSize;
const int m_linksPerWorkItem;
const int m_maxLinksPerWavefront;
int m_maxBatchesWithinWave;
int m_maxVerticesWithinWave;
int m_numWavefronts;
int m_maxVertex;
struct NumBatchesVerticesPair
{
int numBatches;
int numVertices;
};
btAlignedObjectArray<int> m_linksPerWavefront;
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
btOpenCLBuffer< NumBatchesVerticesPair > m_clNumBatchesAndVerticesWithinWaves;
// All arrays here will contain batches of m_maxLinksPerWavefront links
// ordered by wavefront.
// with either global vertex pairs or local vertex pairs
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
btOpenCLBuffer<int> m_clWavefrontVerticesGlobalAddresses;
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
btOpenCLBuffer<LinkNodePair> m_clLinkVerticesLocalAddresses;
btOpenCLBuffer<float> m_clLinkStrength;
btOpenCLBuffer<float> m_clLinksMassLSC;
btOpenCLBuffer<float> m_clLinksRestLengthSquared;
btOpenCLBuffer<float> m_clLinksRestLength;
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt(
const LinkDescription &link,
int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
int getMaxVerticesPerWavefront()
{
return m_maxVerticesWithinWave;
}
int getWavefrontSize()
{
return m_wavefrontSize;
}
int getLinksPerWorkItem()
{
return m_linksPerWorkItem;
}
int getMaxLinksPerWavefront()
{
return m_maxLinksPerWavefront;
}
int getMaxBatchesPerWavefront()
{
return m_maxBatchesWithinWave;
}
int getNumWavefronts()
{
return m_numWavefronts;
}
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
{
return m_numBatchesAndVerticesWithinWaves[wavefront];
}
int getVertexGlobalAddresses( int vertexIndex )
{
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
}
/**
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
*/
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
{
return m_linkVerticesLocalAddresses[linkIndex];
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H

View file

@ -0,0 +1,126 @@
#include "btSoftBodySolverOutputCLtoGL.h"
#include <stdio.h> //@todo: remove the debugging printf at some stage
#include "btSoftBodySolver_OpenCL.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "btSoftBodySolverVertexBuffer_OpenGL.h"
#include "BulletSoftBody/btSoftBody.h"
////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A
static char* OutputToVertexArrayCLString =
#include "OpenCLC10/OutputToVertexArray.cl"
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
static const size_t workGroupSize = 128;
void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
{
btSoftBodySolver *solver = softBody->getSoftBodySolver();
btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
checkInitialized();
btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
const int firstVertex = currentCloth->getFirstVertex();
const int lastVertex = firstVertex + currentCloth->getNumVertices();
if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {
const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);
cl_int ciErrNum = CL_SUCCESS;
cl_mem clBuffer = openGLVertexBuffer->getBuffer();
cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
if( !vertexBuffer->hasNormals() )
outputKernel = outputToVertexArrayWithoutNormalsKernel;
ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
int numVertices = currentCloth->getNumVertices();
ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
if( vertexBuffer->hasVertexPositions() )
{
int vertexOffset = vertexBuffer->getVertexOffset();
int vertexStride = vertexBuffer->getVertexStride();
ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
}
if( vertexBuffer->hasNormals() )
{
int normalOffset = vertexBuffer->getNormalOffset();
int normalStride = vertexBuffer->getNormalStride();
ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
}
size_t numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
}
ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
}
} else {
btAssert( "Undefined output for this solver output" == false );
}
// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
clFinish(m_cqCommandQue);
} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
bool btSoftBodySolverOutputCLtoGL::buildShaders()
{
// Ensure current kernels are released first
releaseKernels();
bool returnVal = true;
if( m_shadersInitialized )
return true;
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
if( returnVal )
m_shadersInitialized = true;
return returnVal;
} // btSoftBodySolverOutputCLtoGL::buildShaders
void btSoftBodySolverOutputCLtoGL::releaseKernels()
{
RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
m_shadersInitialized = false;
} // btSoftBodySolverOutputCLtoGL::releaseKernels
bool btSoftBodySolverOutputCLtoGL::checkInitialized()
{
if( !m_shadersInitialized )
if( buildShaders() )
m_shadersInitialized = true;
return m_shadersInitialized;
}

View file

@ -0,0 +1,62 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
#include "btSoftBodySolver_OpenCL.h"
/**
* Class to manage movement of data from a solver to a given target.
* This version is the CL to GL interop version.
*/
class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
{
protected:
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
CLFunctions clFunctions;
cl_kernel outputToVertexArrayWithNormalsKernel;
cl_kernel outputToVertexArrayWithoutNormalsKernel;
bool m_shadersInitialized;
virtual bool checkInitialized();
virtual bool buildShaders();
void releaseKernels();
public:
btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ),
m_cxMainContext( cxMainContext ),
clFunctions(cqCommandQue, cxMainContext),
outputToVertexArrayWithNormalsKernel( 0 ),
outputToVertexArrayWithoutNormalsKernel( 0 ),
m_shadersInitialized( false )
{
}
virtual ~btSoftBodySolverOutputCLtoGL()
{
releaseKernels();
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H

View file

@ -0,0 +1,84 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData
{
public:
bool m_onGPU;
cl_command_queue m_queue;
btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet> m_clVertexIndices;
btOpenCLBuffer<float> m_clArea;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clNormal;
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_triangleAddresses;
/**
* Start and length values for computation batches over link data.
*/
struct btSomePair
{
btSomePair() {}
btSomePair(int f,int s)
:first(f),second(s)
{
}
int first;
int second;
};
btAlignedObjectArray< btSomePair > m_batchStartLengths;
public:
btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx );
virtual ~btSoftBodyTriangleDataOpenCL();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createTriangles( int numTriangles );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire triangle set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
}; // class btSoftBodyTriangleDataOpenCL
#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H

View file

@ -0,0 +1,166 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#include <CL/cl_gl.h>
#endif //__APPLE__
#endif//USE_MINICL
#ifdef _WIN32//for glut.h
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#ifdef _WINDOWS
#include <windows.h>
#include <GL/gl.h>
#include <GL/glu.h>
#else
#include <GL/glut.h>
#endif //_WINDOWS
#endif //APPLE
class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
{
protected:
/** OpenCL context */
cl_context m_context;
/** OpenCL command queue */
cl_command_queue m_commandQueue;
/** OpenCL interop buffer */
cl_mem m_buffer;
/** VBO in GL that is the basis of the interop buffer */
GLuint m_openGLVBO;
public:
/**
* context is the OpenCL context this interop buffer will work in.
* queue is the command queue that kernels and data movement will be enqueued into.
* openGLVBO is the OpenGL vertex buffer data will be copied into.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
*/
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
{
#ifndef USE_MINICL
cl_int ciErrNum = CL_SUCCESS;
m_context = context;
m_commandQueue = cqCommandQue;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_openGLVBO = openGLVBO;
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
m_hasVertexPositions = true;
#else
btAssert(0);//MiniCL shouldn't get here
#endif
}
/**
* context is the OpenCL context this interop buffer will work in.
* queue is the command queue that kernels and data movement will be enqueued into.
* openGLVBO is the OpenGL vertex buffer data will be copied into.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
* normalOffset is the offset in floats to the first normal.
* normalStride is the stride in floats between normals.
*/
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
{
#ifndef USE_MINICL
cl_int ciErrNum = CL_SUCCESS;
m_context = context;
m_commandQueue = cqCommandQue;
m_openGLVBO = openGLVBO;
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
m_normalOffset = normalOffset;
m_normalStride = normalStride;
m_hasNormals = true;
#else
btAssert(0);
#endif //USE_MINICL
}
virtual ~btOpenGLInteropVertexBufferDescriptor()
{
clReleaseMemObject( m_buffer );
}
/**
* Return the type of the vertex buffer descriptor.
*/
virtual BufferTypes getBufferType() const
{
return OPENGL_BUFFER;
}
virtual cl_context getContext() const
{
return m_context;
}
virtual cl_mem getBuffer() const
{
return m_buffer;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H

View file

@ -0,0 +1,52 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData
{
protected:
bool m_onGPU;
cl_command_queue m_queue;
public:
btOpenCLBuffer<int> m_clClothIdentifier;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clVertexPosition;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clVertexPreviousPosition;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clVertexVelocity;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clVertexForceAccumulator;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clVertexNormal;
btOpenCLBuffer<float> m_clVertexInverseMass;
btOpenCLBuffer<float> m_clVertexArea;
btOpenCLBuffer<int> m_clVertexTriangleCount;
public:
btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyVertexDataOpenCL();
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H

View file

@ -0,0 +1,527 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
#define BT_SOFT_BODY_SOLVER_OPENCL_H
#include "stddef.h" //for size_t
#include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h"
#include "BulletSoftBody/btSoftBody.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#include "btSoftBodySolverLinkData_OpenCL.h"
#include "btSoftBodySolverVertexData_OpenCL.h"
#include "btSoftBodySolverTriangleData_OpenCL.h"
class CLFunctions
{
protected:
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
int m_kernelCompilationFailures;
public:
CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ),
m_cxMainContext( cxMainContext ),
m_kernelCompilationFailures(0)
{
}
int getKernelCompilationFailures() const
{
return m_kernelCompilationFailures;
}
/**
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/
virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
void clearKernelCompilationFailures()
{
m_kernelCompilationFailures=0;
}
};
/**
* Entry in the collision shape array.
* Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
*/
struct CollisionShapeDescription
{
Vectormath::Aos::Transform3 shapeTransform;
Vectormath::Aos::Vector3 linearVelocity;
Vectormath::Aos::Vector3 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Both needed for capsule
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
CollisionShapeDescription()
{
collisionShapeType = 0;
margin = 0;
friction = 0;
}
};
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btOpenCLAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
/**
* Update the bounds in the btSoftBody object
*/
void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
};
class btOpenCLSoftBodySolver : public btSoftBodySolver
{
public:
struct UIntVector3
{
UIntVector3()
{
x = 0;
y = 0;
z = 0;
_padding = 0;
}
UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
{
x = x_;
y = y_;
z = z_;
_padding = 0;
}
unsigned int x;
unsigned int y;
unsigned int z;
unsigned int _padding;
};
struct CollisionObjectIndices
{
CollisionObjectIndices( int f, int e )
{
firstObject = f;
endObject = e;
}
int firstObject;
int endObject;
};
btSoftBodyLinkDataOpenCL m_linkData;
btSoftBodyVertexDataOpenCL m_vertexData;
btSoftBodyTriangleDataOpenCL m_triangleData;
protected:
CLFunctions m_defaultCLFunctions;
CLFunctions* m_currentCLFunctions;
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
bool m_shadersInitialized;
/**
* Cloths owned by this solver.
* Only our cloths are in this array.
*/
btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clPerClothAcceleration;
/** Wind velocity to be applied normal to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clPerClothWindVelocity;
/** Velocity damping factor */
btAlignedObjectArray< float > m_perClothDampingFactor;
btOpenCLBuffer<float> m_clPerClothDampingFactor;
/** Velocity correction coefficient */
btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient;
btOpenCLBuffer<float> m_clPerClothVelocityCorrectionCoefficient;
/** Lift parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothLiftFactor;
btOpenCLBuffer<float> m_clPerClothLiftFactor;
/** Drag parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothDragFactor;
btOpenCLBuffer<float> m_clPerClothDragFactor;
/** Density of the medium in which each cloth sits */
btAlignedObjectArray< float > m_perClothMediumDensity;
btOpenCLBuffer<float> m_clPerClothMediumDensity;
/**
* Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
*/
btAlignedObjectArray< CollisionObjectIndices > m_perClothCollisionObjects;
btOpenCLBuffer<CollisionObjectIndices> m_clPerClothCollisionObjects;
/**
* Collision shapes being passed across to the cloths in this solver.
*/
btAlignedObjectArray< CollisionShapeDescription > m_collisionObjectDetails;
btOpenCLBuffer< CollisionShapeDescription > m_clCollisionObjectDetails;
/**
* Friction coefficient for each cloth
*/
btAlignedObjectArray< float > m_perClothFriction;
btOpenCLBuffer< float > m_clPerClothFriction;
// anchor node info
struct AnchorNodeInfoCL
{
int clVertexIndex;
btSoftBody::Node* pNode;
};
btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clAnchorPosition;
btAlignedObjectArray<int> m_anchorIndex;
btOpenCLBuffer<int> m_clAnchorIndex;
bool m_bUpdateAnchoredNodePos;
cl_kernel m_prepareLinksKernel;
cl_kernel m_solvePositionsFromLinksKernel;
cl_kernel m_updateConstantsKernel;
cl_kernel m_integrateKernel;
cl_kernel m_addVelocityKernel;
cl_kernel m_updatePositionsFromVelocitiesKernel;
cl_kernel m_updateVelocitiesFromPositionsWithoutVelocitiesKernel;
cl_kernel m_updateVelocitiesFromPositionsWithVelocitiesKernel;
cl_kernel m_vSolveLinksKernel;
cl_kernel m_solveCollisionsAndUpdateVelocitiesKernel;
cl_kernel m_resetNormalsAndAreasKernel;
cl_kernel m_normalizeNormalsAndAreasKernel;
cl_kernel m_updateSoftBodiesKernel;
cl_kernel m_outputToVertexArrayKernel;
cl_kernel m_applyForcesKernel;
cl_kernel m_updateFixedVertexPositionsKernel;
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
size_t m_defaultWorkGroupSize;
virtual bool buildShaders();
void resetNormalsAndAreas( int numVertices );
void normalizeNormalsAndAreas( int numVertices );
void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
void prepareCollisionConstraints();
Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
int findSoftBodyIndex( const btSoftBody* const softBody );
virtual void applyForces( float solverdt );
void updateFixedVertexPositions();
/**
* Integrate motion on the solver.
*/
virtual void integrate( float solverdt );
virtual void updateConstants( float timeStep );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
//////////////////////////////////////
// Kernel dispatches
void prepareLinks();
void solveLinksForVelocity( int startLink, int numLinks, float kst );
void updatePositionsFromVelocities( float solverdt );
virtual void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
virtual void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
void updateBounds();
void releaseKernels();
public:
btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
virtual ~btOpenCLSoftBodySolver();
btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
virtual btSoftBodyLinkData &getLinkData();
virtual btSoftBodyVertexData &getVertexData();
virtual btSoftBodyTriangleData &getTriangleData();
virtual SolverTypes getSolverType() const
{
return CL_SOLVER;
}
virtual bool checkInitialized();
virtual void updateSoftBodies( );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies(bool bMove = true);
virtual void solveConstraints( float solverdt );
virtual void predictMotion( float solverdt );
virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
virtual void processCollision( btSoftBody*, btSoftBody* );
virtual void setDefaultWorkgroupSize(size_t workGroupSize)
{
m_defaultWorkGroupSize = workGroupSize;
}
virtual size_t getDefaultWorkGroupSize() const
{
return m_defaultWorkGroupSize;
}
void setCLFunctions(CLFunctions* funcs)
{
if (funcs)
m_currentCLFunctions = funcs;
else
m_currentCLFunctions = &m_defaultCLFunctions;
}
}; // btOpenCLSoftBodySolver
/**
* Class to manage movement of data from a solver to a given target.
* This version is the CL to CPU version.
*/
class btSoftBodySolverOutputCLtoCPU : public btSoftBodySolverOutput
{
protected:
public:
btSoftBodySolverOutputCLtoCPU()
{
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H

View file

@ -0,0 +1,81 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
#include "stddef.h" //for size_t
#include "vectormath/vmInclude.h"
#include "btSoftBodySolver_OpenCL.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
#include "btSoftBodySolverVertexData_OpenCL.h"
#include "btSoftBodySolverTriangleData_OpenCL.h"
class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
{
protected:
btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
virtual bool buildShaders();
void updateConstants( float timeStep );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
//////////////////////////////////////
// Kernel dispatches
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
public:
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
virtual ~btOpenCLSoftBodySolverSIMDAware();
virtual SolverTypes getSolverType() const
{
return CL_SIMD_SOLVER;
}
virtual btSoftBodyLinkData &getLinkData();
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void solveConstraints( float solverdt );
}; // btOpenCLSoftBodySolverSIMDAware
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H

View file

@ -0,0 +1,748 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_DATA_H
#define BT_SOFT_BODY_SOLVER_DATA_H
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
#include "vectormath/vmInclude.h"
class btSoftBodyLinkData
{
public:
/**
* Class representing a link as a set of three indices into the vertex array.
*/
class LinkNodePair
{
public:
int vertex0;
int vertex1;
LinkNodePair()
{
vertex0 = 0;
vertex1 = 0;
}
LinkNodePair( int v0, int v1 )
{
vertex0 = v0;
vertex1 = v1;
}
};
/**
* Class describing a link for input into the system.
*/
class LinkDescription
{
protected:
int m_vertex0;
int m_vertex1;
float m_linkLinearStiffness;
float m_linkStrength;
public:
LinkDescription()
{
m_vertex0 = 0;
m_vertex1 = 0;
m_linkLinearStiffness = 1.0;
m_linkStrength = 1.0;
}
LinkDescription( int newVertex0, int newVertex1, float linkLinearStiffness )
{
m_vertex0 = newVertex0;
m_vertex1 = newVertex1;
m_linkLinearStiffness = linkLinearStiffness;
m_linkStrength = 1.0;
}
LinkNodePair getVertexPair() const
{
LinkNodePair nodes;
nodes.vertex0 = m_vertex0;
nodes.vertex1 = m_vertex1;
return nodes;
}
void setVertex0( int vertex )
{
m_vertex0 = vertex;
}
void setVertex1( int vertex )
{
m_vertex1 = vertex;
}
void setLinkLinearStiffness( float linearStiffness )
{
m_linkLinearStiffness = linearStiffness;
}
void setLinkStrength( float strength )
{
m_linkStrength = strength;
}
int getVertex0() const
{
return m_vertex0;
}
int getVertex1() const
{
return m_vertex1;
}
float getLinkStrength() const
{
return m_linkStrength;
}
float getLinkLinearStiffness() const
{
return m_linkLinearStiffness;
}
};
protected:
// NOTE:
// Vertex reference data is stored relative to global array, not relative to individual cloth.
// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
// to another.
btAlignedObjectArray< LinkNodePair > m_links; // Vertex pair for the link
btAlignedObjectArray< float > m_linkStrength; // Strength of each link
// (inverseMassA + inverseMassB)/ linear stiffness coefficient
btAlignedObjectArray< float > m_linksMassLSC;
btAlignedObjectArray< float > m_linksRestLengthSquared;
// Current vector length of link
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_linksCLength;
// 1/(current length * current length * massLSC)
btAlignedObjectArray< float > m_linksLengthRatio;
btAlignedObjectArray< float > m_linksRestLength;
btAlignedObjectArray< float > m_linksMaterialLinearStiffnessCoefficient;
public:
btSoftBodyLinkData()
{
}
virtual ~btSoftBodyLinkData()
{
}
virtual void clear()
{
m_links.resize(0);
m_linkStrength.resize(0);
m_linksMassLSC.resize(0);
m_linksRestLengthSquared.resize(0);
m_linksLengthRatio.resize(0);
m_linksRestLength.resize(0);
m_linksMaterialLinearStiffnessCoefficient.resize(0);
}
int getNumLinks()
{
return m_links.size();
}
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks )
{
int previousSize = m_links.size();
int newSize = previousSize + numLinks;
// Resize all the arrays that store link data
m_links.resize( newSize );
m_linkStrength.resize( newSize );
m_linksMassLSC.resize( newSize );
m_linksRestLengthSquared.resize( newSize );
m_linksCLength.resize( newSize );
m_linksLengthRatio.resize( newSize );
m_linksRestLength.resize( newSize );
m_linksMaterialLinearStiffnessCoefficient.resize( newSize );
}
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex )
{
m_links[linkIndex] = link.getVertexPair();
m_linkStrength[linkIndex] = link.getLinkStrength();
m_linksMassLSC[linkIndex] = 0.f;
m_linksRestLengthSquared[linkIndex] = 0.f;
m_linksCLength[linkIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_linksLengthRatio[linkIndex] = 0.f;
m_linksRestLength[linkIndex] = 0.f;
m_linksMaterialLinearStiffnessCoefficient[linkIndex] = link.getLinkLinearStiffness();
}
/**
* Return true if data is on the accelerator.
* The CPU version of this class will return true here because
* the CPU is the same as the accelerator.
*/
virtual bool onAccelerator()
{
return true;
}
/**
* Move data from host memory to the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveToAccelerator()
{
return true;
}
/**
* Move data from host memory from the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveFromAccelerator()
{
return true;
}
/**
* Return reference to the vertex index pair for link linkIndex as stored on the host.
*/
LinkNodePair &getVertexPair( int linkIndex )
{
return m_links[linkIndex];
}
/**
* Return reference to strength of link linkIndex as stored on the host.
*/
float &getStrength( int linkIndex )
{
return m_linkStrength[linkIndex];
}
/**
* Return a reference to the strength of the link corrected for link sorting.
* This is important if we are using data on an accelerator which has the data sorted in some fashion.
*/
virtual float &getStrengthCorrected( int linkIndex )
{
return getStrength( linkIndex );
}
/**
* Return reference to the rest length of link linkIndex as stored on the host.
*/
float &getRestLength( int linkIndex )
{
return m_linksRestLength[linkIndex];
}
/**
* Return reference to linear stiffness coefficient for link linkIndex as stored on the host.
*/
float &getLinearStiffnessCoefficient( int linkIndex )
{
return m_linksMaterialLinearStiffnessCoefficient[linkIndex];
}
/**
* Return reference to the MassLSC value for link linkIndex as stored on the host.
*/
float &getMassLSC( int linkIndex )
{
return m_linksMassLSC[linkIndex];
}
/**
* Return reference to rest length squared for link linkIndex as stored on the host.
*/
float &getRestLengthSquared( int linkIndex )
{
return m_linksRestLengthSquared[linkIndex];
}
/**
* Return reference to current length of link linkIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getCurrentLength( int linkIndex )
{
return m_linksCLength[linkIndex];
}
/**
* Return the link length ratio from for link linkIndex as stored on the host.
*/
float &getLinkLengthRatio( int linkIndex )
{
return m_linksLengthRatio[linkIndex];
}
};
/**
* Wrapper for vertex data information.
* By wrapping it like this we stand a good chance of being able to optimise for storage format easily.
* It should also help us make sure all the data structures remain consistent.
*/
class btSoftBodyVertexData
{
public:
/**
* Class describing a vertex for input into the system.
*/
class VertexDescription
{
private:
Vectormath::Aos::Point3 m_position;
/** Inverse mass. If this is 0f then the mass was 0 because that simplifies calculations. */
float m_inverseMass;
public:
VertexDescription()
{
m_position = Vectormath::Aos::Point3( 0.f, 0.f, 0.f );
m_inverseMass = 0.f;
}
VertexDescription( const Vectormath::Aos::Point3 &position, float mass )
{
m_position = position;
if( mass > 0.f )
m_inverseMass = 1.0f/mass;
else
m_inverseMass = 0.f;
}
void setPosition( const Vectormath::Aos::Point3 &position )
{
m_position = position;
}
void setInverseMass( float inverseMass )
{
m_inverseMass = inverseMass;
}
void setMass( float mass )
{
if( mass > 0.f )
m_inverseMass = 1.0f/mass;
else
m_inverseMass = 0.f;
}
Vectormath::Aos::Point3 getPosition() const
{
return m_position;
}
float getInverseMass() const
{
return m_inverseMass;
}
float getMass() const
{
if( m_inverseMass == 0.f )
return 0.f;
else
return 1.0f/m_inverseMass;
}
};
protected:
// identifier for the individual cloth
// For the CPU we don't really need this as we can grab the cloths and iterate over only their vertices
// For a parallel accelerator knowing on a per-vertex basis which cloth we're part of will help for obtaining
// per-cloth data
// For sorting etc it might also be helpful to be able to use in-array data such as this.
btAlignedObjectArray< int > m_clothIdentifier;
btAlignedObjectArray< Vectormath::Aos::Point3 > m_vertexPosition; // vertex positions
btAlignedObjectArray< Vectormath::Aos::Point3 > m_vertexPreviousPosition; // vertex positions
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_vertexVelocity; // Velocity
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_vertexForceAccumulator; // Force accumulator
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_vertexNormal; // Normals
btAlignedObjectArray< float > m_vertexInverseMass; // Inverse mass
btAlignedObjectArray< float > m_vertexArea; // Area controlled by the vertex
btAlignedObjectArray< int > m_vertexTriangleCount; // Number of triangles touching this vertex
public:
btSoftBodyVertexData()
{
}
virtual ~btSoftBodyVertexData()
{
}
virtual void clear()
{
m_clothIdentifier.resize(0);
m_vertexPosition.resize(0);
m_vertexPreviousPosition.resize(0);
m_vertexVelocity.resize(0);
m_vertexForceAccumulator.resize(0);
m_vertexNormal.resize(0);
m_vertexInverseMass.resize(0);
m_vertexArea.resize(0);
m_vertexTriangleCount.resize(0);
}
int getNumVertices()
{
return m_vertexPosition.size();
}
int getClothIdentifier( int vertexIndex )
{
return m_clothIdentifier[vertexIndex];
}
void setVertexAt( const VertexDescription &vertex, int vertexIndex )
{
m_vertexPosition[vertexIndex] = vertex.getPosition();
m_vertexPreviousPosition[vertexIndex] = vertex.getPosition();
m_vertexVelocity[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_vertexForceAccumulator[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_vertexNormal[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_vertexInverseMass[vertexIndex] = vertex.getInverseMass();
m_vertexArea[vertexIndex] = 0.f;
m_vertexTriangleCount[vertexIndex] = 0;
}
/**
* Create numVertices new vertices for cloth clothIdentifier
* maxVertices allows a buffer zone of extra vertices for alignment or tearing reasons.
*/
void createVertices( int numVertices, int clothIdentifier, int maxVertices = 0 )
{
int previousSize = m_vertexPosition.size();
if( maxVertices == 0 )
maxVertices = numVertices;
int newSize = previousSize + maxVertices;
// Resize all the arrays that store vertex data
m_clothIdentifier.resize( newSize );
m_vertexPosition.resize( newSize );
m_vertexPreviousPosition.resize( newSize );
m_vertexVelocity.resize( newSize );
m_vertexForceAccumulator.resize( newSize );
m_vertexNormal.resize( newSize );
m_vertexInverseMass.resize( newSize );
m_vertexArea.resize( newSize );
m_vertexTriangleCount.resize( newSize );
for( int vertexIndex = previousSize; vertexIndex < newSize; ++vertexIndex )
m_clothIdentifier[vertexIndex] = clothIdentifier;
for( int vertexIndex = (previousSize + numVertices); vertexIndex < newSize; ++vertexIndex )
m_clothIdentifier[vertexIndex] = -1;
}
// Get and set methods in header so they can be inlined
/**
* Return a reference to the position of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Point3 &getPosition( int vertexIndex )
{
return m_vertexPosition[vertexIndex];
}
Vectormath::Aos::Point3 getPosition( int vertexIndex ) const
{
return m_vertexPosition[vertexIndex];
}
/**
* Return a reference to the previous position of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Point3 &getPreviousPosition( int vertexIndex )
{
return m_vertexPreviousPosition[vertexIndex];
}
/**
* Return a reference to the velocity of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getVelocity( int vertexIndex )
{
return m_vertexVelocity[vertexIndex];
}
/**
* Return a reference to the force accumulator of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getForceAccumulator( int vertexIndex )
{
return m_vertexForceAccumulator[vertexIndex];
}
/**
* Return a reference to the normal of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getNormal( int vertexIndex )
{
return m_vertexNormal[vertexIndex];
}
Vectormath::Aos::Vector3 getNormal( int vertexIndex ) const
{
return m_vertexNormal[vertexIndex];
}
/**
* Return a reference to the inverse mass of vertex vertexIndex as stored on the host.
*/
float &getInverseMass( int vertexIndex )
{
return m_vertexInverseMass[vertexIndex];
}
/**
* Get access to the area controlled by this vertex.
*/
float &getArea( int vertexIndex )
{
return m_vertexArea[vertexIndex];
}
/**
* Get access to the array of how many triangles touch each vertex.
*/
int &getTriangleCount( int vertexIndex )
{
return m_vertexTriangleCount[vertexIndex];
}
/**
* Return true if data is on the accelerator.
* The CPU version of this class will return true here because
* the CPU is the same as the accelerator.
*/
virtual bool onAccelerator()
{
return true;
}
/**
* Move data from host memory to the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveToAccelerator()
{
return true;
}
/**
* Move data to host memory from the accelerator if bCopy is false.
* If bCopy is true, copy data to host memory from the accelerator so that data
* won't be moved to accelerator when moveToAccelerator() is called next time.
* If bCopyMinimum is true, only vertex position and normal are copied.
* bCopyMinimum will be meaningful only if bCopy is true.
* The CPU version will always return that it has moved it.
*/
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
{
return true;
}
btAlignedObjectArray< Vectormath::Aos::Point3 > &getVertexPositions()
{
return m_vertexPosition;
}
};
class btSoftBodyTriangleData
{
public:
/**
* Class representing a triangle as a set of three indices into the
* vertex array.
*/
class TriangleNodeSet
{
public:
int vertex0;
int vertex1;
int vertex2;
int _padding;
TriangleNodeSet( )
{
vertex0 = 0;
vertex1 = 0;
vertex2 = 0;
_padding = -1;
}
TriangleNodeSet( int newVertex0, int newVertex1, int newVertex2 )
{
vertex0 = newVertex0;
vertex1 = newVertex1;
vertex2 = newVertex2;
}
};
class TriangleDescription
{
protected:
int m_vertex0;
int m_vertex1;
int m_vertex2;
public:
TriangleDescription()
{
m_vertex0 = 0;
m_vertex1 = 0;
m_vertex2 = 0;
}
TriangleDescription( int newVertex0, int newVertex1, int newVertex2 )
{
m_vertex0 = newVertex0;
m_vertex1 = newVertex1;
m_vertex2 = newVertex2;
}
TriangleNodeSet getVertexSet() const
{
btSoftBodyTriangleData::TriangleNodeSet nodes;
nodes.vertex0 = m_vertex0;
nodes.vertex1 = m_vertex1;
nodes.vertex2 = m_vertex2;
return nodes;
}
};
protected:
// NOTE:
// Vertex reference data is stored relative to global array, not relative to individual cloth.
// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
// to another.
btAlignedObjectArray< TriangleNodeSet > m_vertexIndices;
btAlignedObjectArray< float > m_area;
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_normal;
public:
btSoftBodyTriangleData()
{
}
virtual ~btSoftBodyTriangleData()
{
}
virtual void clear()
{
m_vertexIndices.resize(0);
m_area.resize(0);
m_normal.resize(0);
}
int getNumTriangles()
{
return m_vertexIndices.size();
}
virtual void setTriangleAt( const TriangleDescription &triangle, int triangleIndex )
{
m_vertexIndices[triangleIndex] = triangle.getVertexSet();
}
virtual void createTriangles( int numTriangles )
{
int previousSize = m_vertexIndices.size();
int newSize = previousSize + numTriangles;
// Resize all the arrays that store triangle data
m_vertexIndices.resize( newSize );
m_area.resize( newSize );
m_normal.resize( newSize );
}
/**
* Return the vertex index set for triangle triangleIndex as stored on the host.
*/
const TriangleNodeSet &getVertexSet( int triangleIndex )
{
return m_vertexIndices[triangleIndex];
}
/**
* Get access to the triangle area.
*/
float &getTriangleArea( int triangleIndex )
{
return m_area[triangleIndex];
}
/**
* Get access to the normal vector for this triangle.
*/
Vectormath::Aos::Vector3 &getNormal( int triangleIndex )
{
return m_normal[triangleIndex];
}
/**
* Return true if data is on the accelerator.
* The CPU version of this class will return true here because
* the CPU is the same as the accelerator.
*/
virtual bool onAccelerator()
{
return true;
}
/**
* Move data from host memory to the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveToAccelerator()
{
return true;
}
/**
* Move data from host memory from the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveFromAccelerator()
{
return true;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_DATA_H