Bullet Library v2.81

This commit is contained in:
LuisAntonRebollo 2013-07-04 20:50:16 +02:00
parent 64fef8b2ad
commit 1eb94f4828
462 changed files with 59613 additions and 8036 deletions

View file

@ -1,71 +1,123 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/vectormath/scalar/cpp
${VECTOR_MATH_INCLUDE}
)
ADD_LIBRARY(BulletMultiThreaded
PlatformDefinitions.h
SpuFakeDma.cpp
SpuFakeDma.h
SpuSync.h
SpuDoubleBuffer.h
SpuLibspe2Support.cpp
SpuLibspe2Support.h
btThreadSupportInterface.cpp
btThreadSupportInterface.h
Win32ThreadSupport.cpp
Win32ThreadSupport.h
PosixThreadSupport.cpp
PosixThreadSupport.h
SequentialThreadSupport.cpp
SequentialThreadSupport.h
SpuSampleTaskProcess.h
SpuSampleTaskProcess.cpp
SET(BulletMultiThreaded_SRCS
SpuFakeDma.cpp
SpuLibspe2Support.cpp
btThreadSupportInterface.cpp
Win32ThreadSupport.cpp
PosixThreadSupport.cpp
SequentialThreadSupport.cpp
SpuSampleTaskProcess.cpp
SpuCollisionObjectWrapper.cpp
SpuCollisionTaskProcess.cpp
SpuGatheringCollisionDispatcher.cpp
SpuContactManifoldCollisionAlgorithm.cpp
btParallelConstraintSolver.cpp
#SPURS_PEGatherScatterTask/SpuPEGatherScatterTask.cpp
#SpuPEGatherScatterTaskProcess.cpp
SpuCollisionObjectWrapper.cpp
SpuCollisionObjectWrapper.h
SpuCollisionTaskProcess.h
SpuCollisionTaskProcess.cpp
SpuGatheringCollisionDispatcher.h
SpuGatheringCollisionDispatcher.cpp
SpuContactManifoldCollisionAlgorithm.cpp
SpuContactManifoldCollisionAlgorithm.h
SpuNarrowPhaseCollisionTask/Box.h
SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
SpuNarrowPhaseCollisionTask/boxBoxDistance.h
SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
SpuNarrowPhaseCollisionTask/SpuContactResult.h
SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
#Some GPU related stuff, mainly CUDA and perhaps OpenCL
btGpu3DGridBroadphase.cpp
btGpu3DGridBroadphase.h
btGpu3DGridBroadphaseSharedCode.h
btGpu3DGridBroadphaseSharedDefs.h
btGpu3DGridBroadphaseSharedTypes.h
btGpuDefines.h
btGpuUtilsSharedCode.h
btGpuUtilsSharedDefs.h
#MiniCL provides a small subset of OpenCL
MiniCLTaskScheduler.cpp
MiniCLTaskScheduler.h
MiniCLTask/MiniCLTask.cpp
MiniCLTask/MiniCLTask.h
../MiniCL/cl.h
../MiniCL/cl_gl.h
../MiniCL/cl_platform.h
#Some GPU related stuff, mainly CUDA and perhaps OpenCL
btGpu3DGridBroadphase.cpp
)
SET(Root_HDRS
PlatformDefinitions.h
PpuAddressSpace.h
SpuFakeDma.h
SpuDoubleBuffer.h
SpuLibspe2Support.h
btThreadSupportInterface.h
Win32ThreadSupport.h
PosixThreadSupport.h
SequentialThreadSupport.h
SpuSampleTaskProcess.h
SpuCollisionObjectWrapper.cpp
SpuCollisionObjectWrapper.h
SpuCollisionTaskProcess.h
SpuGatheringCollisionDispatcher.h
SpuContactManifoldCollisionAlgorithm.h
btParallelConstraintSolver.h
#SPURS_PEGatherScatterTask/SpuPEGatherScatterTask.h
#SpuPEGatherScatterTaskProcess.h
#Some GPU related stuff, mainly CUDA and perhaps OpenCL
btGpu3DGridBroadphase.h
btGpu3DGridBroadphaseSharedCode.h
btGpu3DGridBroadphaseSharedDefs.h
btGpu3DGridBroadphaseSharedTypes.h
btGpuDefines.h
btGpuUtilsSharedCode.h
btGpuUtilsSharedDefs.h
)
SET(SpuNarrowPhaseCollisionTask_HDRS
SpuNarrowPhaseCollisionTask/Box.h
SpuNarrowPhaseCollisionTask/boxBoxDistance.h
SpuNarrowPhaseCollisionTask/SpuContactResult.h
SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
)
SET(BulletMultiThreaded_HDRS
${Root_HDRS}
${SpuNarrowPhaseCollisionTask_HDRS}
)
ADD_LIBRARY(BulletMultiThreaded ${BulletMultiThreaded_SRCS} ${BulletMultiThreaded_HDRS})
SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES SOVERSION ${BULLET_VERSION})
SUBDIRS(GpuSoftBodySolvers)
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletCollision)
IF (UNIX)
TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletDynamics BulletCollision pthread)
ELSE()
TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletDynamics BulletCollision)
ENDIF()
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
#INSTALL of other files requires CMake 2.6
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
# IF(INSTALL_EXTRA_LIBS)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletMultiThreaded DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletMultiThreaded DESTINATION lib${LIB_SUFFIX})
INSTALL(DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR} DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING
PATTERN "*.h" PATTERN ".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
# ENDIF (INSTALL_EXTRA_LIBS)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
# Have to list out sub-directories manually:
SET_PROPERTY(SOURCE ${SpuNarrowPhaseCollisionTask_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/SpuNarrowPhaseCollisionTask)
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,13 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SUBDIRS (
OpenCL
)
IF( USE_DX11 )
SUBDIRS( DX11 )
ENDIF( USE_DX11 )

View file

@ -0,0 +1,83 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SET(DXSDK_DIR $ENV{DXSDK_DIR})
SET(DX11_INCLUDE_PATH "${DIRECTX_SDK_BASE_DIR}/Include" CACHE DOCSTRING "Microsoft directX SDK include path")
INCLUDE_DIRECTORIES(
${DX11_INCLUDE_PATH} "../Shared/"
${VECTOR_MATH_INCLUDE}
)
SET(BulletSoftBodyDX11Solvers_SRCS
btSoftBodySolver_DX11.cpp
btSoftBodySolver_DX11SIMDAware.cpp
)
SET(BulletSoftBodyDX11Solvers_HDRS
btSoftBodySolver_DX11.h
btSoftBodySolver_DX11SIMDAware.h
../Shared/btSoftBodySolverData.h
btSoftBodySolverVertexData_DX11.h
btSoftBodySolverTriangleData_DX11.h
btSoftBodySolverLinkData_DX11.h
btSoftBodySolverLinkData_DX11SIMDAware.h
btSoftBodySolverBuffer_DX11.h
btSoftBodySolverVertexBuffer_DX11.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyDX11Solvers_Shaders
OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
ComputeBounds
SolvePositions
SolvePositionsSIMDBatched
SolveCollisionsAndUpdateVelocities
SolveCollisionsAndUpdateVelocitiesSIMDBatched
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyDX11Solvers_Shaders})
LIST(APPEND BulletSoftBodyDX11Solvers_HLSL "HLSL/${f}.hlsl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_DX11 ${BulletSoftBodyDX11Solvers_SRCS} ${BulletSoftBodyDX11Solvers_HDRS} ${BulletSoftBodyDX11Solvers_HLSL})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_DX11 BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES PUBLIC_HEADER "${BulletSoftBodyDX11Solvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,95 @@
MSTRINGIFY(
cbuffer ApplyForcesCB : register( b0 )
{
unsigned int numNodes;
float solverdt;
float epsilon;
int padding3;
};
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexNormal : register( t1 );
StructuredBuffer<float> g_vertexArea : register( t2 );
StructuredBuffer<float> g_vertexInverseMass : register( t3 );
// TODO: These could be combined into a lift/drag factor array along with medium density
StructuredBuffer<float> g_clothLiftFactor : register( t4 );
StructuredBuffer<float> g_clothDragFactor : register( t5 );
StructuredBuffer<float4> g_clothWindVelocity : register( t6 );
StructuredBuffer<float4> g_clothAcceleration : register( t7 );
StructuredBuffer<float> g_clothMediumDensity : register( t8 );
RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
float3 projectOnAxis( float3 v, float3 a )
{
return (a*dot(v, a));
}
[numthreads(128, 1, 1)]
void
ApplyForcesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
unsigned int nodeID = DTid.x;
if( nodeID < numNodes )
{
int clothId = g_vertexClothIdentifier[nodeID];
float nodeIM = g_vertexInverseMass[nodeID];
if( nodeIM > 0.0f )
{
float3 nodeV = g_vertexVelocity[nodeID].xyz;
float3 normal = g_vertexNormal[nodeID].xyz;
float area = g_vertexArea[nodeID];
float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
// Read per-cloth values
float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
float liftFactor = g_clothLiftFactor[clothId];
float dragFactor = g_clothDragFactor[clothId];
float mediumDensity = g_clothMediumDensity[clothId];
// Apply the acceleration to the cloth rather than do this via a force
nodeV += (clothAcceleration*solverdt);
g_vertexVelocity[nodeID] = float4(nodeV, 0.f);
float3 relativeWindVelocity = nodeV - clothWindVelocity;
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
if( relativeSpeedSquared > epsilon )
{
// Correct direction of normal relative to wind direction and get dot product
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
float dvNormal = dot(normal, relativeWindVelocity);
if( dvNormal > 0 )
{
float3 force = float3(0.f, 0.f, 0.f);
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
float c1 = c0 * mediumDensity;
force += normal * (-c1 * liftFactor);
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
float dtim = solverdt * nodeIM;
float3 forceDTIM = force * dtim;
float3 nodeFPlusForce = nodeF + force;
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
nodeF = nodeFPlusForce;
if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
nodeF = nodeFMinus;
g_vertexForceAccumulator[nodeID] = float4(nodeF, 0.0f);
}
}
}
}
}
);

View file

@ -0,0 +1,83 @@
MSTRINGIFY(
cbuffer ComputeBoundsCB : register( b0 )
{
int numNodes;
int numSoftBodies;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
RWStructuredBuffer<uint4> g_clothMinBounds : register( u0 );
RWStructuredBuffer<uint4> g_clothMaxBounds : register( u1 );
groupshared uint4 clothMinBounds[256];
groupshared uint4 clothMaxBounds[256];
[numthreads(128, 1, 1)]
void
ComputeBoundsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const unsigned int UINT_MAX = 0xffffffff;
// Init min and max bounds arrays
if( GTid.x < numSoftBodies )
{
clothMinBounds[GTid.x] = uint4(UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX);
clothMaxBounds[GTid.x] = uint4(0,0,0,0);
}
AllMemoryBarrierWithGroupSync();
int nodeID = DTid.x;
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float3 position = g_vertexPositions[nodeID].xyz;
// Reinterpret position as uint
uint3 positionUInt = uint3(asuint(position.x), asuint(position.y), asuint(position.z));
// Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints
//positionUInt.x ^= uint((-int(positionUInt.x >> 31) | 0x80000000));
//positionUInt.y ^= uint((-int(positionUInt.y >> 31) | 0x80000000));
//positionUInt.z ^= uint((-int(positionUInt.z >> 31) | 0x80000000));
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
InterlockedMin(clothMinBounds[clothIdentifier].x, positionUInt.x);
InterlockedMin(clothMinBounds[clothIdentifier].y, positionUInt.y);
InterlockedMin(clothMinBounds[clothIdentifier].z, positionUInt.z);
InterlockedMax(clothMaxBounds[clothIdentifier].x, positionUInt.x);
InterlockedMax(clothMaxBounds[clothIdentifier].y, positionUInt.y);
InterlockedMax(clothMaxBounds[clothIdentifier].z, positionUInt.z);
}
}
AllMemoryBarrierWithGroupSync();
// Use global atomics to update the global versions of the data
if( GTid.x < numSoftBodies )
{
InterlockedMin(g_clothMinBounds[GTid.x].x, clothMinBounds[GTid.x].x);
InterlockedMin(g_clothMinBounds[GTid.x].y, clothMinBounds[GTid.x].y);
InterlockedMin(g_clothMinBounds[GTid.x].z, clothMinBounds[GTid.x].z);
InterlockedMax(g_clothMaxBounds[GTid.x].x, clothMaxBounds[GTid.x].x);
InterlockedMax(g_clothMaxBounds[GTid.x].y, clothMaxBounds[GTid.x].y);
InterlockedMax(g_clothMaxBounds[GTid.x].z, clothMaxBounds[GTid.x].z);
}
}
);

View file

@ -0,0 +1,41 @@
MSTRINGIFY(
cbuffer IntegrateCB : register( b0 )
{
int numNodes;
float solverdt;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<float> g_vertexInverseMasses : register( t0 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u2 );
RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u3 );
[numthreads(128, 1, 1)]
void
IntegrateKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 velocity = g_vertexVelocity[nodeID].xyz;
float3 force = g_vertexForceAccumulator[nodeID].xyz;
float inverseMass = g_vertexInverseMasses[nodeID];
g_vertexPreviousPositions[nodeID] = float4(position, 0.f);
velocity += force * inverseMass * solverdt;
position += velocity * solverdt;
g_vertexForceAccumulator[nodeID] = float4(0.f, 0.f, 0.f, 0.0f);
g_vertexPositions[nodeID] = float4(position, 0.f);
g_vertexVelocity[nodeID] = float4(velocity, 0.f);
}
}
);

View file

@ -0,0 +1,63 @@
MSTRINGIFY(
cbuffer OutputToVertexArrayCB : register( b0 )
{
int startNode;
int numNodes;
int positionOffset;
int positionStride;
int normalOffset;
int normalStride;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexPositions : register( t0 );
StructuredBuffer<float4> g_vertexNormals : register( t1 );
RWBuffer<float> g_vertexBuffer : register( u0 );
[numthreads(128, 1, 1)]
void
OutputToVertexArrayWithNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
int normalDestination = nodeID * normalStride + normalOffset;
g_vertexBuffer[normalDestination] = normal.x;
g_vertexBuffer[normalDestination+1] = normal.y;
g_vertexBuffer[normalDestination+2] = normal.z;
}
}
[numthreads(128, 1, 1)]
void
OutputToVertexArrayWithoutNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
}
}
);

View file

@ -0,0 +1,44 @@
MSTRINGIFY(
cbuffer PrepareLinksCB : register( b0 )
{
int numLinks;
int padding0;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float> g_linksMassLSC : register( t1 );
StructuredBuffer<float4> g_nodesPreviousPosition : register( t2 );
RWStructuredBuffer<float> g_linksLengthRatio : register( u0 );
RWStructuredBuffer<float4> g_linksCurrentLength : register( u1 );
[numthreads(128, 1, 1)]
void
PrepareLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x;
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
float massLSC = g_linksMassLSC[linkID];
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
linkLengthRatio = 1./linkLengthRatio;
g_linksCurrentLength[linkID] = linkCurrentLength;
g_linksLengthRatio[linkID] = linkLengthRatio;
}
}
);

View file

@ -0,0 +1,55 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
int startLink;
int numLinks;
float kst;
float ti;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float> g_linksMassLSC : register( t1 );
StructuredBuffer<float> g_linksRestLengthSquared : register( t2 );
StructuredBuffer<float> g_verticesInverseMass : register( t3 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
[numthreads(128, 1, 1)]
void
SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x + startLink;
if( DTid.x < numLinks )
{
float massLSC = g_linksMassLSC[linkID];
float restLengthSquared = g_linksRestLengthSquared[linkID];
if( massLSC > 0.0f )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_verticesInverseMass[node0];
float inverseMass1 = g_verticesInverseMass[node1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
g_vertexPositions[node0] = float4(position0, 0.f);
g_vertexPositions[node1] = float4(position1, 0.f);
}
}
}
);

View file

@ -0,0 +1,147 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
int startWaveInBatch;
int numWaves;
float kst;
float ti;
};
// Number of batches per wavefront stored one element per logical wavefront
StructuredBuffer<int2> g_wavefrontBatchCountsVertexCounts : register( t0 );
// Set of up to maxNumVertices vertex addresses per wavefront
StructuredBuffer<int> g_vertexAddressesPerWavefront : register( t1 );
StructuredBuffer<float> g_verticesInverseMass : register( t2 );
// Per-link data layed out structured in terms of sub batches within wavefronts
StructuredBuffer<int2> g_linksVertexIndices : register( t3 );
StructuredBuffer<float> g_linksMassLSC : register( t4 );
StructuredBuffer<float> g_linksRestLengthSquared : register( t5 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
// Data loaded on a per-wave basis
groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
// Storing the vertex addresses actually slowed things down a little
//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
[numthreads(BLOCK_SIZE, 1, 1)]
void
SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
int batchesWithinWavefront = 0;
int verticesUsedByWave = 0;
int cond = wavefront < (startWaveInBatch + numWaves);
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( cond)
{
// Load the batch counts for the wavefronts
int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
//vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress;
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
}
// Ensure compiler does not re-order memory operations
//AllMemoryBarrier();
AllMemoryBarrierWithGroupSync ();
if( cond)
{
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float3 position0 = vertexPositionSharedData[vertexAddress0].xyz;
float3 position1 = vertexPositionSharedData[vertexAddress1].xyz;
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
AllMemoryBarrier();
vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f);
vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f);
// Ensure compiler does not re-order memory operations
AllMemoryBarrier();
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
}
}
}
);

View file

@ -0,0 +1,48 @@
MSTRINGIFY(
cbuffer UpdateConstantsCB : register( b0 )
{
int numLinks;
int padding0;
int padding1;
int padding2;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
StructuredBuffer<float> g_vertexInverseMasses : register( t2 );
StructuredBuffer<float> g_linksMaterialLSC : register( t3 );
RWStructuredBuffer<float> g_linksMassLSC : register( u0 );
RWStructuredBuffer<float> g_linksRestLengthSquared : register( u1 );
RWStructuredBuffer<float> g_linksRestLengths : register( u2 );
[numthreads(128, 1, 1)]
void
UpdateConstantsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x;
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_vertexInverseMasses[node0];
float inverseMass1 = g_vertexInverseMasses[node1];
float3 difference = position0 - position1;
float length2 = dot(difference, difference);
float length = sqrt(length2);
g_linksRestLengths[linkID] = length;
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
g_linksRestLengthSquared[linkID] = length*length;
}
}
);

View file

@ -0,0 +1,49 @@
MSTRINGIFY(
cbuffer UpdateVelocitiesFromPositionsWithVelocitiesCB : register( b0 )
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexPositions : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<int> g_vertexClothIndices : register( t2 );
StructuredBuffer<float> g_clothVelocityCorrectionCoefficients : register( t3 );
StructuredBuffer<float> g_clothDampingFactor : register( t4 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
RWStructuredBuffer<float4> g_vertexForces : register( u1 );
[numthreads(128, 1, 1)]
void
updateVelocitiesFromPositionsWithVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
float3 velocity = g_vertexVelocities[nodeID].xyz;
int clothIndex = g_vertexClothIndices[nodeID];
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float3 difference = position - previousPosition;
velocity += difference*velocityCorrectionCoefficient*isolverdt;
// Damp the velocity
velocity *= velocityCoefficient;
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,98 @@
MSTRINGIFY(
cbuffer UpdateSoftBodiesCB : register( b0 )
{
unsigned int numNodes;
unsigned int startFace;
unsigned int numFaces;
float epsilon;
};
// Node indices for each link
StructuredBuffer<int4> g_triangleVertexIndexSet : register( t0 );
StructuredBuffer<float4> g_vertexPositions : register( t1 );
StructuredBuffer<int> g_vertexTriangleCount : register( t2 );
RWStructuredBuffer<float4> g_vertexNormals : register( u0 );
RWStructuredBuffer<float> g_vertexArea : register( u1 );
RWStructuredBuffer<float4> g_triangleNormals : register( u2 );
RWStructuredBuffer<float> g_triangleArea : register( u3 );
[numthreads(128, 1, 1)]
void
ResetNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
if( DTid.x < numNodes )
{
g_vertexNormals[DTid.x] = float4(0.0f, 0.0f, 0.0f, 0.0f);
g_vertexArea[DTid.x] = 0.0f;
}
}
[numthreads(128, 1, 1)]
void
UpdateSoftBodiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int faceID = DTid.x + startFace;
if( DTid.x < numFaces )
{
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
int nodeIndex0 = triangleIndexSet.x;
int nodeIndex1 = triangleIndexSet.y;
int nodeIndex2 = triangleIndexSet.z;
float3 node0 = g_vertexPositions[nodeIndex0].xyz;
float3 node1 = g_vertexPositions[nodeIndex1].xyz;
float3 node2 = g_vertexPositions[nodeIndex2].xyz;
float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
float vertexArea0 = g_vertexArea[nodeIndex0];
float vertexArea1 = g_vertexArea[nodeIndex1];
float vertexArea2 = g_vertexArea[nodeIndex2];
float3 vector0 = node1 - node0;
float3 vector1 = node2 - node0;
float3 faceNormal = cross(vector0.xyz, vector1.xyz);
float triangleArea = length(faceNormal);
nodeNormal0 = nodeNormal0 + faceNormal;
nodeNormal1 = nodeNormal1 + faceNormal;
nodeNormal2 = nodeNormal2 + faceNormal;
vertexArea0 = vertexArea0 + triangleArea;
vertexArea1 = vertexArea1 + triangleArea;
vertexArea2 = vertexArea2 + triangleArea;
g_triangleNormals[faceID] = float4(normalize(faceNormal), 0.f);
g_vertexNormals[nodeIndex0] = float4(nodeNormal0, 0.f);
g_vertexNormals[nodeIndex1] = float4(nodeNormal1, 0.f);
g_vertexNormals[nodeIndex2] = float4(nodeNormal2, 0.f);
g_triangleArea[faceID] = triangleArea;
g_vertexArea[nodeIndex0] = vertexArea0;
g_vertexArea[nodeIndex1] = vertexArea1;
g_vertexArea[nodeIndex2] = vertexArea2;
}
}
[numthreads(128, 1, 1)]
void
NormalizeNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
if( DTid.x < numNodes )
{
float4 normal = g_vertexNormals[DTid.x];
float area = g_vertexArea[DTid.x];
int numTriangles = g_vertexTriangleCount[DTid.x];
float vectorLength = length(normal);
g_vertexNormals[DTid.x] = normalize(normal);
g_vertexArea[DTid.x] = area/float(numTriangles);
}
}
);

View file

@ -0,0 +1,44 @@
MSTRINGIFY(
cbuffer UpdateVelocitiesFromPositionsWithoutVelocitiesCB : register( b0 )
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexPositions : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<int> g_vertexClothIndices : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
RWStructuredBuffer<float4> g_vertexForces : register( u1 );
[numthreads(128, 1, 1)]
void
updateVelocitiesFromPositionsWithoutVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
if( nodeID < numNodes )
{
float3 position = g_vertexPositions[nodeID].xyz;
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
float3 velocity = g_vertexVelocities[nodeID].xyz;
int clothIndex = g_vertexClothIndices[nodeID];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float3 difference = position - previousPosition;
velocity = difference*velocityCoefficient*isolverdt;
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,35 @@
MSTRINGIFY(
cbuffer UpdatePositionsFromVelocitiesCB : register( b0 )
{
int numNodes;
float solverSDT;
int padding1;
int padding2;
};
StructuredBuffer<float4> g_vertexVelocities : register( t0 );
RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u0 );
RWStructuredBuffer<float4> g_vertexCurrentPosition : register( u1 );
[numthreads(128, 1, 1)]
void
UpdatePositionsFromVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int vertexID = DTid.x;
if( vertexID < numNodes )
{
float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
float3 velocity = g_vertexVelocities[vertexID].xyz;
float3 newPosition = previousPosition + velocity*solverSDT;
g_vertexCurrentPosition[vertexID] = float4(newPosition, 0.f);
g_vertexPreviousPositions[vertexID] = float4(newPosition, 0.f);
}
}
);

View file

@ -0,0 +1,55 @@
MSTRINGIFY(
cbuffer VSolveLinksCB : register( b0 )
{
int startLink;
int numLinks;
float kst;
int padding;
};
// Node indices for each link
StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
StructuredBuffer<float> g_linksLengthRatio : register( t1 );
StructuredBuffer<float4> g_linksCurrentLength : register( t2 );
StructuredBuffer<float> g_vertexInverseMass : register( t3 );
RWStructuredBuffer<float4> g_vertexVelocity : register( u0 );
[numthreads(128, 1, 1)]
void
VSolveLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int linkID = DTid.x + startLink;
if( DTid.x < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linkLengthRatio = g_linksLengthRatio[linkID];
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
float vertexInverseMass0 = g_vertexInverseMass[node0];
float vertexInverseMass1 = g_vertexInverseMass[node1];
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
float dotResult = dot(linkCurrentLength, nodeDifference);
float j = -dotResult*linkLengthRatio*kst;
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
vertexVelocity0 += velocityChange0;
vertexVelocity1 -= velocityChange1;
g_vertexVelocity[node0] = float4(vertexVelocity0, 0.f);
g_vertexVelocity[node1] = float4(vertexVelocity1, 0.f);
}
}
);

View file

@ -0,0 +1,170 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
struct CollisionObjectIndices
{
int firstObject;
int endObject;
};
struct CollisionShapeDescription
{
float4x4 shapeTransform;
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
float margin;
float friction;
int padding0;
int padding1;
};
// From btBroadphaseProxy.h
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<float> g_perClothFriction : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
[numthreads(128, 1, 1)]
void
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
if( DTid.x < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
{
velocity = float3(15, 0, 0);
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
float4x4 worldTransform = shapeDescription.shapeTransform;
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
float4 worldC1 = mul(worldTransform, c1);
float4 worldC2 = mul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0 )
forceOnVertex -= frictionVector;
}
}
}
} else {
// Update velocity
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
}
);

View file

@ -0,0 +1,191 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
struct CollisionObjectIndices
{
int firstObject;
int endObject;
};
struct CollisionShapeDescription
{
float4x4 shapeTransform;
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
float margin;
float friction;
int padding0;
int padding1;
};
// From btBroadphaseProxy.h
static const int CAPSULE_SHAPE_PROXYTYPE = 10;
// Node indices for each link
StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
StructuredBuffer<float> g_perClothFriction : register( t2 );
StructuredBuffer<float> g_clothDampingFactor : register( t3 );
StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
RWStructuredBuffer<float4> g_vertexForces : register( u0 );
RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
// A buffer of local collision shapes
// TODO: Iterate to support more than 16
groupshared CollisionShapeDescription localCollisionShapes[16];
[numthreads(128, 1, 1)]
void
SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
int nodeID = DTid.x;
float3 forceOnVertex = float3(0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float3 velocity;
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
// First load all of the collision objects into LDS
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( GTid.x < numObjects )
{
localCollisionShapes[GTid.x] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + GTid.x ];
}
}
// Safe as the vertices are padded so that not more than one soft body is in a group
AllMemoryBarrierWithGroupSync();
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
if( numObjects > 0 )
{
velocity = float3(0, 0, 0);
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
float4x4 worldTransform = localCollisionShapes[collision].shapeTransform;
float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f);
float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
float4 worldC1 = mul(worldTransform, c1);
float4 worldC2 = mul(worldTransform, c2);
float3 segment = (worldC2 - worldC1).xyz;
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float3 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = normalize(position - worldC1).xyz;
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = normalize(position - worldC2).xyz;
} else {
dist = distanceFromLine;
normalVector = normalize(position - closestPoint).xyz;
}
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
float3 p1 = normalize(cross(normalVector, segment));
float3 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0 )
forceOnVertex -= frictionVector;
}
}
}
} else {
// Update velocity
float3 difference = position.xyz - previousPosition.xyz;
velocity = difference*velocityCoefficient*isolverdt;
}
g_vertexVelocities[nodeID] = float4(velocity, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
);

View file

@ -0,0 +1,323 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
#define BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
// DX11 support
#include <windows.h>
#include <crtdbg.h>
#include <d3d11.h>
#include <d3dx11.h>
#include <d3dcompiler.h>
#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
#endif
/**
* DX11 Buffer that tracks a host buffer on use to ensure size-correctness.
*/
template <typename ElementType> class btDX11Buffer
{
protected:
ID3D11Device* m_d3dDevice;
ID3D11DeviceContext* m_d3dDeviceContext;
ID3D11Buffer* m_Buffer;
ID3D11ShaderResourceView* m_SRV;
ID3D11UnorderedAccessView* m_UAV;
btAlignedObjectArray< ElementType >* m_CPUBuffer;
// TODO: Separate this from the main class
// as read back buffers can be shared between buffers
ID3D11Buffer* m_readBackBuffer;
int m_gpuSize;
bool m_onGPU;
bool m_readOnlyOnGPU;
bool createBuffer( ID3D11Buffer *preexistingBuffer = 0)
{
HRESULT hr = S_OK;
// Create all CS buffers
if( preexistingBuffer )
{
m_Buffer = preexistingBuffer;
} else {
D3D11_BUFFER_DESC buffer_desc;
ZeroMemory(&buffer_desc, sizeof(buffer_desc));
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
if( m_readOnlyOnGPU )
buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
else
buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
// At a minimum the buffer must exist
if( buffer_desc.ByteWidth == 0 )
buffer_desc.ByteWidth = sizeof(ElementType);
buffer_desc.StructureByteStride = sizeof(ElementType);
hr = m_d3dDevice->CreateBuffer(&buffer_desc, NULL, &m_Buffer);
if( FAILED( hr ) )
return (hr==S_OK);
}
if( m_readOnlyOnGPU )
{
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
if( srvbuffer_desc.Buffer.ElementWidth == 0 )
srvbuffer_desc.Buffer.ElementWidth = 1;
hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
if( FAILED( hr ) )
return (hr==S_OK);
} else {
// Create SRV
D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
if( srvbuffer_desc.Buffer.ElementWidth == 0 )
srvbuffer_desc.Buffer.ElementWidth = 1;
hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
if( FAILED( hr ) )
return (hr==S_OK);
// Create UAV
D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
uavbuffer_desc.Buffer.NumElements = m_CPUBuffer->size();
if( uavbuffer_desc.Buffer.NumElements == 0 )
uavbuffer_desc.Buffer.NumElements = 1;
hr = m_d3dDevice->CreateUnorderedAccessView(m_Buffer, &uavbuffer_desc, &m_UAV);
if( FAILED( hr ) )
return (hr==S_OK);
// Create read back buffer
D3D11_BUFFER_DESC readback_buffer_desc;
ZeroMemory(&readback_buffer_desc, sizeof(readback_buffer_desc));
readback_buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
readback_buffer_desc.Usage = D3D11_USAGE_STAGING;
readback_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
readback_buffer_desc.StructureByteStride = sizeof(ElementType);
hr = m_d3dDevice->CreateBuffer(&readback_buffer_desc, NULL, &m_readBackBuffer);
if( FAILED( hr ) )
return (hr==S_OK);
}
m_gpuSize = m_CPUBuffer->size();
return true;
}
public:
btDX11Buffer( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext, btAlignedObjectArray< ElementType > *CPUBuffer, bool readOnly )
{
m_d3dDevice = d3dDevice;
m_d3dDeviceContext = d3dDeviceContext;
m_Buffer = 0;
m_SRV = 0;
m_UAV = 0;
m_readBackBuffer = 0;
m_CPUBuffer = CPUBuffer;
m_gpuSize = 0;
m_onGPU = false;
m_readOnlyOnGPU = readOnly;
}
virtual ~btDX11Buffer()
{
SAFE_RELEASE(m_Buffer);
SAFE_RELEASE(m_SRV);
SAFE_RELEASE(m_UAV);
SAFE_RELEASE(m_readBackBuffer);
}
ID3D11ShaderResourceView* &getSRV()
{
return m_SRV;
}
ID3D11UnorderedAccessView* &getUAV()
{
return m_UAV;
}
ID3D11Buffer* &getBuffer()
{
return m_Buffer;
}
/**
* Move the data to the GPU if it is not there already.
*/
bool moveToGPU()
{
// Reallocate if GPU size is too small
if( (m_CPUBuffer->size() > m_gpuSize ) )
m_onGPU = false;
if( !m_onGPU && m_CPUBuffer->size() > 0 )
{
// If the buffer doesn't exist or the CPU-side buffer has changed size, create
// We should really delete the old one, too, but let's leave that for later
if( !m_Buffer || (m_CPUBuffer->size() != m_gpuSize) )
{
SAFE_RELEASE(m_Buffer);
SAFE_RELEASE(m_SRV);
SAFE_RELEASE(m_UAV);
SAFE_RELEASE(m_readBackBuffer);
if( !createBuffer() )
{
btAssert("Buffer creation failed.");
return false;
}
}
if( m_gpuSize > 0 )
{
D3D11_BOX destRegion;
destRegion.left = 0;
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
m_d3dDeviceContext->UpdateSubresource(m_Buffer, 0, &destRegion, &((*m_CPUBuffer)[0]), 0, 0);
m_onGPU = true;
}
}
return true;
}
/**
* Move the data back from the GPU if it is on there and isn't read only.
*/
bool moveFromGPU()
{
if( m_CPUBuffer->size() > 0 )
{
if( m_onGPU && !m_readOnlyOnGPU )
{
// Copy back
D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
//m_pd3dImmediateContext->CopyResource(m_phAngVelReadBackBuffer, m_phAngVel);
D3D11_BOX destRegion;
destRegion.left = 0;
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
m_d3dDeviceContext->CopySubresourceRegion(
m_readBackBuffer,
0,
0,
0,
0 ,
m_Buffer,
0,
&destRegion
);
m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);
//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));
m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
m_onGPU = false;
}
}
return true;
}
/**
* Copy the data back from the GPU without changing its state to be CPU-side.
* Useful if we just want to view it on the host for visualization.
*/
bool copyFromGPU()
{
if( m_CPUBuffer->size() > 0 )
{
if( m_onGPU && !m_readOnlyOnGPU )
{
// Copy back
D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
D3D11_BOX destRegion;
destRegion.left = 0;
destRegion.front = 0;
destRegion.top = 0;
destRegion.bottom = 1;
destRegion.back = 1;
destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
m_d3dDeviceContext->CopySubresourceRegion(
m_readBackBuffer,
0,
0,
0,
0 ,
m_Buffer,
0,
&destRegion
);
m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);
//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));
m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
}
}
return true;
}
/**
* Call if data has changed on the CPU.
* Can then trigger a move to the GPU as necessary.
*/
virtual void changedOnCPU()
{
m_onGPU = false;
}
}; // class btDX11Buffer
#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H

View file

@ -0,0 +1,103 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyLinkDataDX11 : public btSoftBodyLinkData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
btDX11Buffer<LinkNodePair> m_dx11Links;
btDX11Buffer<float> m_dx11LinkStrength;
btDX11Buffer<float> m_dx11LinksMassLSC;
btDX11Buffer<float> m_dx11LinksRestLengthSquared;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11LinksCLength;
btDX11Buffer<float> m_dx11LinksLengthRatio;
btDX11Buffer<float> m_dx11LinksRestLength;
btDX11Buffer<float> m_dx11LinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_batchStartLengths;
//ID3D11Buffer* readBackBuffer;
btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyLinkDataDX11();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H

View file

@ -0,0 +1,173 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
const int m_wavefrontSize;
const int m_linksPerWorkItem;
const int m_maxLinksPerWavefront;
int m_maxBatchesWithinWave;
int m_maxVerticesWithinWave;
int m_numWavefronts;
int m_maxVertex;
struct NumBatchesVerticesPair
{
int numBatches;
int numVertices;
};
// Array storing number of links in each wavefront
btAlignedObjectArray<int> m_linksPerWavefront;
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
btDX11Buffer< NumBatchesVerticesPair > m_dx11NumBatchesAndVerticesWithinWaves;
// All arrays here will contain batches of m_maxLinksPerWavefront links
// ordered by wavefront.
// with either global vertex pairs or local vertex pairs
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
btDX11Buffer<int> m_dx11WavefrontVerticesGlobalAddresses;
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
btDX11Buffer<LinkNodePair> m_dx11LinkVerticesLocalAddresses;
btDX11Buffer<float> m_dx11LinkStrength;
btDX11Buffer<float> m_dx11LinksMassLSC;
btDX11Buffer<float> m_dx11LinksRestLengthSquared;
btDX11Buffer<float> m_dx11LinksRestLength;
btDX11Buffer<float> m_dx11LinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
//ID3D11Buffer* readBackBuffer;
btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyLinkDataDX11SIMDAware();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
int getMaxVerticesPerWavefront()
{
return m_maxVerticesWithinWave;
}
int getWavefrontSize()
{
return m_wavefrontSize;
}
int getLinksPerWorkItem()
{
return m_linksPerWorkItem;
}
int getMaxLinksPerWavefront()
{
return m_maxLinksPerWavefront;
}
int getMaxBatchesPerWavefront()
{
return m_maxBatchesWithinWave;
}
int getNumWavefronts()
{
return m_numWavefronts;
}
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
{
return m_numBatchesAndVerticesWithinWaves[wavefront];
}
int getVertexGlobalAddresses( int vertexIndex )
{
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
}
/**
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
*/
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
{
return m_linkVerticesLocalAddresses[linkIndex];
}
};
#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H

View file

@ -0,0 +1,96 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyTriangleDataDX11 : public btSoftBodyTriangleData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
btDX11Buffer<btSoftBodyTriangleData::TriangleNodeSet> m_dx11VertexIndices;
btDX11Buffer<float> m_dx11Area;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11Normal;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_triangleAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_batchStartLengths;
//ID3D11Buffer* readBackBuffer;
public:
btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyTriangleDataDX11();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createTriangles( int numTriangles );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire triangle set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H

View file

@ -0,0 +1,107 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include <windows.h>
#include <crtdbg.h>
#include <d3d11.h>
#include <d3dx11.h>
#include <d3dcompiler.h>
class btDX11VertexBufferDescriptor : public btVertexBufferDescriptor
{
protected:
/** Context of the DX11 device on which the vertex buffer is stored. */
ID3D11DeviceContext* m_context;
/** DX11 vertex buffer */
ID3D11Buffer* m_vertexBuffer;
/** UAV for DX11 buffer */
ID3D11UnorderedAccessView* m_vertexBufferUAV;
public:
/**
* buffer is a pointer to the DX11 buffer to place the vertex data in.
* UAV is a pointer to the UAV representation of the buffer laid out in floats.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
*/
btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride )
{
m_context = context;
m_vertexBuffer = buffer;
m_vertexBufferUAV = UAV;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
}
/**
* buffer is a pointer to the DX11 buffer to place the vertex data in.
* UAV is a pointer to the UAV representation of the buffer laid out in floats.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
* normalOffset is the offset in floats to the first normal.
* normalStride is the stride in floats between normals.
*/
btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
{
m_context = context;
m_vertexBuffer = buffer;
m_vertexBufferUAV = UAV;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
m_normalOffset = normalOffset;
m_normalStride = normalStride;
m_hasNormals = true;
}
virtual ~btDX11VertexBufferDescriptor()
{
}
/**
* Return the type of the vertex buffer descriptor.
*/
virtual BufferTypes getBufferType() const
{
return DX11_BUFFER;
}
virtual ID3D11DeviceContext* getContext() const
{
return m_context;
}
virtual ID3D11Buffer* getbtDX11Buffer() const
{
return m_vertexBuffer;
}
virtual ID3D11UnorderedAccessView* getDX11UAV() const
{
return m_vertexBufferUAV;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H

View file

@ -0,0 +1,63 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
#define BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
class btSoftBodyLinkData;
class btSoftBodyLinkData::LinkDescription;
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyVertexDataDX11 : public btSoftBodyVertexData
{
protected:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
public:
btDX11Buffer<int> m_dx11ClothIdentifier;
btDX11Buffer<Vectormath::Aos::Point3> m_dx11VertexPosition;
btDX11Buffer<Vectormath::Aos::Point3> m_dx11VertexPreviousPosition;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11VertexVelocity;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11VertexForceAccumulator;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11VertexNormal;
btDX11Buffer<float> m_dx11VertexInverseMass;
btDX11Buffer<float> m_dx11VertexArea;
btDX11Buffer<int> m_dx11VertexTriangleCount;
//ID3D11Buffer* readBackBuffer;
public:
btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyVertexDataDX11();
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
};
#endif // #ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H

View file

@ -0,0 +1,691 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h"
#include "btSoftBodySolverVertexBuffer_DX11.h"
#include "btSoftBodySolverLinkData_DX11.h"
#include "btSoftBodySolverVertexData_DX11.h"
#include "btSoftBodySolverTriangleData_DX11.h"
class DXFunctions
{
public:
typedef HRESULT (WINAPI * CompileFromMemoryFunc)(LPCSTR,SIZE_T,LPCSTR,const D3D10_SHADER_MACRO*,LPD3D10INCLUDE,LPCSTR,LPCSTR,UINT,UINT,ID3DX11ThreadPump*,ID3D10Blob**,ID3D10Blob**,HRESULT*);
ID3D11Device * m_dx11Device;
ID3D11DeviceContext* m_dx11Context;
CompileFromMemoryFunc m_dx11CompileFromMemory;
DXFunctions(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, CompileFromMemoryFunc dx11CompileFromMemory) :
m_dx11Device( dx11Device ),
m_dx11Context( dx11Context ),
m_dx11CompileFromMemory( dx11CompileFromMemory )
{
}
class KernelDesc
{
protected:
public:
ID3D11ComputeShader* kernel;
ID3D11Buffer* constBuffer;
KernelDesc()
{
kernel = 0;
constBuffer = 0;
}
virtual ~KernelDesc()
{
// TODO: this should probably destroy its kernel but we need to be careful
// in case KernelDescs are copied
}
};
/**
* Compile a compute shader kernel from a string and return the appropriate KernelDesc object.
*/
KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 );
};
class btDX11SoftBodySolver : public btSoftBodySolver
{
protected:
/**
* Entry in the collision shape array.
* Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
*/
struct CollisionShapeDescription
{
Vectormath::Aos::Transform3 shapeTransform;
Vectormath::Aos::Vector3 linearVelocity;
Vectormath::Aos::Vector3 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Both needed for capsule
float radius;
float halfHeight;
float margin;
float friction;
CollisionShapeDescription()
{
collisionShapeType = 0;
margin = 0;
friction = 0;
}
};
struct UIntVector3
{
UIntVector3()
{
x = 0;
y = 0;
z = 0;
_padding = 0;
}
UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
{
x = x_;
y = y_;
z = z_;
_padding = 0;
}
unsigned int x;
unsigned int y;
unsigned int z;
unsigned int _padding;
};
public:
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices() const
{
return m_numVertices;
}
int getNumTriangles() const
{
return m_numTriangles;
}
int getMaxVertices() const
{
return m_maxVertices;
}
int getMaxTriangles() const
{
return m_maxTriangles;
}
int getFirstVertex() const
{
return m_firstVertex;
}
int getFirstTriangle() const
{
return m_firstTriangle;
}
/**
* Update the bounds in the btSoftBody object
*/
void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
};
struct CollisionObjectIndices
{
CollisionObjectIndices( int f, int e )
{
firstObject = f;
endObject = e;
}
int firstObject;
int endObject;
};
struct PrepareLinksCB
{
int numLinks;
int padding0;
int padding1;
int padding2;
};
struct SolvePositionsFromLinksKernelCB
{
int startLink;
int numLinks;
float kst;
float ti;
};
struct IntegrateCB
{
int numNodes;
float solverdt;
int padding1;
int padding2;
};
struct UpdatePositionsFromVelocitiesCB
{
int numNodes;
float solverSDT;
int padding1;
int padding2;
};
struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
struct UpdateVelocitiesFromPositionsWithVelocitiesCB
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
struct UpdateSoftBodiesCB
{
int numNodes;
int startFace;
int numFaces;
float epsilon;
};
struct ApplyForcesCB
{
unsigned int numNodes;
float solverdt;
float epsilon;
int padding3;
};
struct AddVelocityCB
{
int startNode;
int lastNode;
float velocityX;
float velocityY;
float velocityZ;
int padding1;
int padding2;
int padding3;
};
struct VSolveLinksCB
{
int startLink;
int numLinks;
float kst;
int padding;
};
struct ComputeBoundsCB
{
int numNodes;
int numSoftBodies;
int padding1;
int padding2;
};
struct SolveCollisionsAndUpdateVelocitiesCB
{
unsigned int numNodes;
float isolverdt;
int padding0;
int padding1;
};
protected:
ID3D11Device * m_dx11Device;
ID3D11DeviceContext* m_dx11Context;
DXFunctions dxFunctions;
public:
/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
btSoftBodyLinkDataDX11 m_linkData;
btSoftBodyVertexDataDX11 m_vertexData;
btSoftBodyTriangleDataDX11 m_triangleData;
protected:
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
bool m_shadersInitialized;
/**
* Cloths owned by this solver.
* Only our cloths are in this array.
*/
btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11PerClothAcceleration;
/** Wind velocity to be applied normal to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11PerClothWindVelocity;
/** Velocity damping factor */
btAlignedObjectArray< float > m_perClothDampingFactor;
btDX11Buffer<float> m_dx11PerClothDampingFactor;
/** Velocity correction coefficient */
btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient;
btDX11Buffer<float> m_dx11PerClothVelocityCorrectionCoefficient;
/** Lift parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothLiftFactor;
btDX11Buffer<float> m_dx11PerClothLiftFactor;
/** Drag parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothDragFactor;
btDX11Buffer<float> m_dx11PerClothDragFactor;
/** Density of the medium in which each cloth sits */
btAlignedObjectArray< float > m_perClothMediumDensity;
btDX11Buffer<float> m_dx11PerClothMediumDensity;
/**
* Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
*/
btAlignedObjectArray< CollisionObjectIndices > m_perClothCollisionObjects;
btDX11Buffer<CollisionObjectIndices> m_dx11PerClothCollisionObjects;
/**
* Collision shapes being passed across to the cloths in this solver.
*/
btAlignedObjectArray< CollisionShapeDescription > m_collisionObjectDetails;
btDX11Buffer< CollisionShapeDescription > m_dx11CollisionObjectDetails;
/**
* Minimum bounds for each cloth.
* Updated by GPU and returned for use by broad phase.
* These are int vectors as a reminder that they store the int representation of a float, not a float.
* Bit 31 is inverted - is floats are stored with int-sortable values.
*/
btAlignedObjectArray< UIntVector3 > m_perClothMinBounds;
btDX11Buffer< UIntVector3 > m_dx11PerClothMinBounds;
/**
* Maximum bounds for each cloth.
* Updated by GPU and returned for use by broad phase.
* These are int vectors as a reminder that they store the int representation of a float, not a float.
* Bit 31 is inverted - is floats are stored with int-sortable values.
*/
btAlignedObjectArray< UIntVector3 > m_perClothMaxBounds;
btDX11Buffer< UIntVector3 > m_dx11PerClothMaxBounds;
/**
* Friction coefficient for each cloth
*/
btAlignedObjectArray< float > m_perClothFriction;
btDX11Buffer< float > m_dx11PerClothFriction;
DXFunctions::KernelDesc prepareLinksKernel;
DXFunctions::KernelDesc solvePositionsFromLinksKernel;
DXFunctions::KernelDesc vSolveLinksKernel;
DXFunctions::KernelDesc integrateKernel;
DXFunctions::KernelDesc addVelocityKernel;
DXFunctions::KernelDesc updatePositionsFromVelocitiesKernel;
DXFunctions::KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel;
DXFunctions::KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel;
DXFunctions::KernelDesc solveCollisionsAndUpdateVelocitiesKernel;
DXFunctions::KernelDesc resetNormalsAndAreasKernel;
DXFunctions::KernelDesc normalizeNormalsAndAreasKernel;
DXFunctions::KernelDesc computeBoundsKernel;
DXFunctions::KernelDesc updateSoftBodiesKernel;
DXFunctions::KernelDesc applyForcesKernel;
bool m_enableUpdateBounds;
/**
* Integrate motion on the solver.
*/
virtual void integrate( float solverdt );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
virtual bool buildShaders();
void resetNormalsAndAreas( int numVertices );
void normalizeNormalsAndAreas( int numVertices );
void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
void prepareCollisionConstraints();
Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
virtual void applyForces( float solverdt );
virtual void updateConstants( float timeStep );
int findSoftBodyIndex( const btSoftBody* const softBody );
//////////////////////////////////////
// Kernel dispatches
virtual void prepareLinks();
void updatePositionsFromVelocities( float solverdt );
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void solveLinksForVelocity( int startLink, int numLinks, float kst );
void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
void computeBounds( );
void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
void updateBounds();
void releaseKernels();
public:
btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
virtual ~btDX11SoftBodySolver();
virtual SolverTypes getSolverType() const
{
return DX_SOLVER;
}
void setEnableUpdateBounds(bool enableBounds)
{
m_enableUpdateBounds = enableBounds;
}
bool getEnableUpdateBounds() const
{
return m_enableUpdateBounds;
}
virtual btSoftBodyLinkData &getLinkData();
virtual btSoftBodyVertexData &getVertexData();
virtual btSoftBodyTriangleData &getTriangleData();
btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
const btAcceleratedSoftBodyInterface * const findSoftBodyInterface( const btSoftBody* const softBody ) const;
virtual bool checkInitialized();
virtual void updateSoftBodies( );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies(bool bMove = true);
virtual void solveConstraints( float solverdt );
virtual void predictMotion( float solverdt );
virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
virtual void processCollision( btSoftBody*, btSoftBody* );
};
/**
* Class to manage movement of data from a solver to a given target.
* This version is the DX to CPU version.
*/
class btSoftBodySolverOutputDXtoCPU : public btSoftBodySolverOutput
{
protected:
public:
btSoftBodySolverOutputDXtoCPU()
{
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
/**
* Class to manage movement of data from a solver to a given target.
* This version is the DX to DX version and subclasses DX to CPU so that it works for that too.
*/
class btSoftBodySolverOutputDXtoDX : public btSoftBodySolverOutputDXtoCPU
{
protected:
struct OutputToVertexArrayCB
{
int startNode;
int numNodes;
int positionOffset;
int positionStride;
int normalOffset;
int normalStride;
int padding1;
int padding2;
};
DXFunctions dxFunctions;
DXFunctions::KernelDesc outputToVertexArrayWithNormalsKernel;
DXFunctions::KernelDesc outputToVertexArrayWithoutNormalsKernel;
bool m_shadersInitialized;
bool checkInitialized();
bool buildShaders();
void releaseKernels();
public:
btSoftBodySolverOutputDXtoDX(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory) :
dxFunctions( dx11Device, dx11Context, dx11CompileFromMemory )
{
m_shadersInitialized = false;
}
~btSoftBodySolverOutputDXtoDX()
{
releaseKernels();
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H

View file

@ -0,0 +1,81 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "vectormath/vmInclude.h"
#include "btSoftBodySolver_DX11.h"
#include "btSoftBodySolverVertexBuffer_DX11.h"
#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
#include "btSoftBodySolverVertexData_DX11.h"
#include "btSoftBodySolverTriangleData_DX11.h"
#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
class btDX11SIMDAwareSoftBodySolver : public btDX11SoftBodySolver
{
protected:
struct SolvePositionsFromLinksKernelCB
{
int startWave;
int numWaves;
float kst;
float ti;
};
/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
btSoftBodyLinkDataDX11SIMDAware m_linkData;
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
virtual bool buildShaders();
void updateConstants( float timeStep );
//////////////////////////////////////
// Kernel dispatches
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
// End kernel dispatches
/////////////////////////////////////
public:
btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
virtual ~btDX11SIMDAwareSoftBodySolver();
virtual btSoftBodyLinkData &getLinkData();
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void solveConstraints( float solverdt );
virtual SolverTypes getSolverType() const
{
return DX_SIMD_SOLVER;
}
};
#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H

View file

@ -0,0 +1,23 @@
hasDX11 = findDirectX11()
if (hasDX11) then
project "BulletSoftBodyDX11Solvers"
initDirectX11()
kind "StaticLib"
targetdir "../../../../lib"
includedirs {
".",
"../../.."
}
files {
"**.cpp",
"**.h"
}
end

View file

@ -0,0 +1,62 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${AMD_OPENCL_INCLUDES}
)
ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../btSoftBodySolver_OpenCLSIMDAware.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_AMD BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,27 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "BulletSoftBodySolvers_OpenCL_AMD"
defines { "USE_AMD_OPENCL","CL_PLATFORM_AMD"}
initOpenCL_AMD()
kind "StaticLib"
targetdir "../../../../../lib"
includedirs {
".",
"../../../..",
"../../../../../Glut"
}
files {
"../btSoftBodySolver_OpenCL.cpp",
"../btSoftBodySolver_OpenCLSIMDAware.cpp",
"../btSoftBodySolverOutputCLtoGL.cpp"
}
end

View file

@ -0,0 +1,77 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
IF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES LINK_FLAGS "-framework OpenCL")
ENDIF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Apple BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,17 @@
SUBDIRS( MiniCL )
IF(BUILD_INTEL_OPENCL_DEMOS)
SUBDIRS(Intel)
ENDIF()
IF(BUILD_AMD_OPENCL_DEMOS)
SUBDIRS(AMD)
ENDIF()
IF(BUILD_NVIDIA_OPENCL_DEMOS)
SUBDIRS(NVidia)
ENDIF()
IF(APPLE AND OPENCL_LIBRARY)
SUBDIRS(Apple)
ENDIF()

View file

@ -0,0 +1,82 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${INTEL_OPENCL_INCLUDES}
)
ADD_DEFINITIONS(-DUSE_INTEL_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_INTEL)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../btSoftBodySolver_OpenCLSIMDAware.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Intel
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Intel BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,27 @@
hasCL = findOpenCL_Intel()
if (hasCL) then
project "BulletSoftBodySolvers_OpenCL_Intel"
defines { "USE_INTEL_OPENCL","CL_PLATFORM_INTEL"}
initOpenCL_Intel()
kind "StaticLib"
targetdir "../../../../../lib"
includedirs {
".",
"../../../..",
"../../../../../Glut"
}
files {
"../btSoftBodySolver_OpenCL.cpp",
"../btSoftBodySolver_OpenCLSIMDAware.cpp",
"../btSoftBodySolverOutputCLtoGL.cpp"
}
end

View file

@ -0,0 +1,75 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
ADD_DEFINITIONS(-DUSE_MINICL)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Mini MiniCL BulletMultiThreaded BulletSoftBody)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,249 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <MiniCL/cl_MiniCL_Defs.h>
#define MSTRINGIFY(A) A
#include "../OpenCLC10/ApplyForces.cl"
#include "../OpenCLC10/Integrate.cl"
#include "../OpenCLC10/PrepareLinks.cl"
#include "../OpenCLC10/SolvePositions.cl"
#include "../OpenCLC10/UpdateNodes.cl"
#include "../OpenCLC10/UpdateNormals.cl"
#include "../OpenCLC10/UpdatePositions.cl"
#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
#include "../OpenCLC10/VSolveLinks.cl"
#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
MINICL_REGISTER(PrepareLinksKernel)
MINICL_REGISTER(VSolveLinksKernel)
MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel)
MINICL_REGISTER(SolvePositionsFromLinksKernel)
MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel)
MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel)
MINICL_REGISTER(IntegrateKernel)
MINICL_REGISTER(ApplyForcesKernel)
MINICL_REGISTER(ResetNormalsAndAreasKernel)
MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
MINICL_REGISTER(UpdateSoftBodiesKernel)
MINICL_REGISTER(UpdateFixedVertexPositions)
float mydot3a(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = float4(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = float4(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = float4(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = float4(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions GUID_ARG)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position (g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition (g_vertexPreviousPositions[nodeID].xyz, 1.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
// Four columns of worldTransform matrix
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 (0.f, 0.f, 0.f, 1.f);
float4 c2 (0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (worldC2 - worldC1);
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
float4 closestPoint = (worldC1 + (segment * distanceAlongSegment));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = float4(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = float4(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = float4(normalize(position - closestPoint).xyz, 0.f);
}
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - float4(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + float4(normalVector*(minDistance - dist)*0.9f);
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = normalize(cross(normalVector, segment));
float4 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
}
g_vertexVelocities[nodeID] = float4(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = float4(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
}
}
MINICL_REGISTER(SolveCollisionsAndUpdateVelocitiesKernel);

View file

@ -0,0 +1,81 @@
ADD_DEFINITIONS(-DUSE_NVIDIA_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_NVIDIA)
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${NVIDIA_OPENCL_INCLUDES}
)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../Shared/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_NVidia BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX})
#headers are already installed by BulletMultiThreaded library
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View file

@ -0,0 +1,27 @@
hasCL = findOpenCL_NVIDIA()
if (hasCL) then
project "BulletSoftBodySolvers_OpenCL_NVIDIA"
defines { "USE_NVIDIA_OPENCL","CL_PLATFORM_NVIDIA"}
initOpenCL_NVIDIA()
kind "StaticLib"
targetdir "../../../../../lib"
includedirs {
".",
"../../../..",
"../../../../../Glut"
}
files {
"../btSoftBodySolver_OpenCL.cpp",
"../btSoftBodySolver_OpenCLSIMDAware.cpp",
"../btSoftBodySolverOutputCLtoGL.cpp"
}
end

View file

@ -0,0 +1,102 @@
MSTRINGIFY(
float adot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
float alength3(float4 a)
{
a.w = 0;
return length(a);
}
float4 anormalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
float4 projectOnAxis( float4 v, float4 a )
{
return (a*adot3(v, a));
}
__kernel void
ApplyForcesKernel(
const uint numNodes,
const float solverdt,
const float epsilon,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexNormal,
__global float * g_vertexArea,
__global float * g_vertexInverseMass,
__global float * g_clothLiftFactor,
__global float * g_clothDragFactor,
__global float4 * g_clothWindVelocity,
__global float4 * g_clothAcceleration,
__global float * g_clothMediumDensity,
__global float4 * g_vertexForceAccumulator,
__global float4 * g_vertexVelocity GUID_ARG)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothId = g_vertexClothIdentifier[nodeID];
float nodeIM = g_vertexInverseMass[nodeID];
if( nodeIM > 0.0f )
{
float4 nodeV = g_vertexVelocity[nodeID];
float4 normal = g_vertexNormal[nodeID];
float area = g_vertexArea[nodeID];
float4 nodeF = g_vertexForceAccumulator[nodeID];
// Read per-cloth values
float4 clothAcceleration = g_clothAcceleration[clothId];
float4 clothWindVelocity = g_clothWindVelocity[clothId];
float liftFactor = g_clothLiftFactor[clothId];
float dragFactor = g_clothDragFactor[clothId];
float mediumDensity = g_clothMediumDensity[clothId];
// Apply the acceleration to the cloth rather than do this via a force
nodeV += (clothAcceleration*solverdt);
g_vertexVelocity[nodeID] = nodeV;
// Aerodynamics
float4 rel_v = nodeV - clothWindVelocity;
float rel_v_len = alength3(rel_v);
float rel_v2 = dot(rel_v, rel_v);
if( rel_v2 > epsilon )
{
float4 rel_v_nrm = anormalize3(rel_v);
float4 nrm = normal;
nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f);
float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f);
float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f);
float n_dot_v = dot(nrm, rel_v_nrm);
// drag force
if ( dragFactor > 0.f )
fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm;
// lift force
// Check angle of attack
// cos(10º) = 0.98480
if ( 0 < n_dot_v && n_dot_v < 0.98480f)
fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm));
nodeF += fDrag + fLift;
g_vertexForceAccumulator[nodeID] = nodeF;
}
}
}
}
);

View file

@ -0,0 +1,82 @@
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
__kernel void
ComputeBoundsKernel(
const int numNodes,
const int numSoftBodies,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexPositions,
/* Unfortunately, to get the atomics below to work these arrays cannot be */
/* uint4, though that is the layout of the data */
/* Therefore this is little-endian-only code */
volatile __global uint * g_clothMinBounds,
volatile __global uint * g_clothMaxBounds,
volatile __local uint * clothMinBounds,
volatile __local uint * clothMaxBounds)
{
// Init min and max bounds arrays
if( get_local_id(0) < numSoftBodies )
{
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
clothMaxBounds[get_local_id(0)*4] = 0;
clothMaxBounds[get_local_id(0)*4+1] = 0;
clothMaxBounds[get_local_id(0)*4+2] = 0;
clothMaxBounds[get_local_id(0)*4+3] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
/* Reinterpret position as uint */
uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
/* Use global atomics to update the global versions of the data */
if( get_local_id(0) < numSoftBodies )
{
/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
}
}
);

View file

@ -0,0 +1,35 @@
MSTRINGIFY(
// Node indices for each link
__kernel void
IntegrateKernel(
const int numNodes,
const float solverdt,
__global float * g_vertexInverseMasses,
__global float4 * g_vertexPositions,
__global float4 * g_vertexVelocity,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexForceAccumulator GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 velocity = g_vertexVelocity[nodeID];
float4 force = g_vertexForceAccumulator[nodeID];
float inverseMass = g_vertexInverseMasses[nodeID];
g_vertexPreviousPositions[nodeID] = position;
velocity += force * inverseMass * solverdt;
position += velocity * solverdt;
g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
g_vertexPositions[nodeID] = position;
g_vertexVelocity[nodeID] = velocity;
}
}
);

View file

@ -0,0 +1,46 @@
MSTRINGIFY(
__kernel void
OutputToVertexArrayWithNormalsKernel(
const int startNode, const int numNodes, __global float *g_vertexBuffer,
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions,
const int normalOffset, const int normalStride, const __global float4* g_vertexNormals )
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
float4 normal = g_vertexNormals[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
int normalDestination = nodeID * normalStride + normalOffset;
g_vertexBuffer[normalDestination] = normal.x;
g_vertexBuffer[normalDestination+1] = normal.y;
g_vertexBuffer[normalDestination+2] = normal.z;
}
}
__kernel void
OutputToVertexArrayWithoutNormalsKernel(
const int startNode, const int numNodes, __global float *g_vertexBuffer,
const int positionOffset, const int positionStride, const __global float4* g_vertexPositions )
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID + startNode];
// Stride should account for the float->float4 conversion
int positionDestination = nodeID * positionStride + positionOffset;
g_vertexBuffer[positionDestination] = position.x;
g_vertexBuffer[positionDestination+1] = position.y;
g_vertexBuffer[positionDestination+2] = position.z;
}
}
);

View file

@ -0,0 +1,38 @@
MSTRINGIFY(
__kernel void
PrepareLinksKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float4 * g_nodesPreviousPosition,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength GUID_ARG)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
float massLSC = g_linksMassLSC[linkID];
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
linkCurrentLength.w = 0.f;
float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
linkLengthRatio = 1.0f/linkLengthRatio;
g_linksCurrentLength[linkID] = linkCurrentLength;
g_linksLengthRatio[linkID] = linkLengthRatio;
}
}
);

View file

@ -0,0 +1,204 @@
MSTRINGIFY(
float mydot3a(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions GUID_ARG)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
if( get_global_id(0) < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
{
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
float colliderFriction = shapeDescription.friction;
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = shapeDescription.halfHeight;
float capsuleRadius = shapeDescription.radius;
float capsuleMargin = shapeDescription.margin;
int capsuleupAxis = shapeDescription.upAxis;
// Four columns of worldTransform matrix
float4 worldTransform[4];
worldTransform[0] = shapeDescription.shapeTransform[0];
worldTransform[1] = shapeDescription.shapeTransform[1];
worldTransform[2] = shapeDescription.shapeTransform[2];
worldTransform[3] = shapeDescription.shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (worldC2 - worldC1);
// compute distance of tangent to vertex along line segment in capsule
float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
float distanceFromLine = length(position - closestPoint);
float distanceFromC1 = length(worldC1 - position);
float distanceFromC2 = length(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > 1.f ) {
dist = distanceFromC2;
normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = (float4)(normalize(position - closestPoint).xyz, 0.f);
}
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
float minDistance = capsuleRadius + capsuleMargin;
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = position + (float4)((minDistance - dist)*normalVector*0.9f);
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = normalize(cross(normalVector, segment));
float4 p2 = normalize(cross(p1, normalVector));
// Full friction is sum of velocities in each direction of plane
float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
// Real friction is peak friction corrected by friction coefficients
frictionVector = frictionVector * (colliderFriction*clothFriction);
float approachSpeed = dot(relativeVelocity, normalVector);
if( approachSpeed <= 0.0f )
forceOnVertex -= frictionVector;
}
}
}
}
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
}
);

View file

@ -0,0 +1,242 @@
MSTRINGIFY(
//#pragma OPENCL EXTENSION cl_amd_printf:enable\n
float mydot3a(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
float mylength3(float4 a)
{
a.w = 0;
return length(a);
}
float4 mynormalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
typedef struct
{
int firstObject;
int endObject;
} CollisionObjectIndices;
typedef struct
{
float4 shapeTransform[4]; // column major 4x4 matrix
float4 linearVelocity;
float4 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Shape information
// Compressed from the union
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
int padding0;
} CollisionShapeDescription;
// From btBroadphaseProxy.h
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
// Multiply column-major matrix against vector
float4 matrixVectorMul( float4 matrix[4], float4 vector )
{
float4 returnVector;
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
returnVector.x = dot(row0, vector);
returnVector.y = dot(row1, vector);
returnVector.z = dot(row2, vector);
returnVector.w = dot(row3, vector);
return returnVector;
}
__kernel void
SolveCollisionsAndUpdateVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global int *g_vertexClothIdentifier,
__global float4 *g_vertexPreviousPositions,
__global float * g_perClothFriction,
__global float * g_clothDampingFactor,
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
__global CollisionShapeDescription * g_collisionObjectDetails,
__global float4 * g_vertexForces,
__global float4 *g_vertexVelocities,
__global float4 *g_vertexPositions,
__local CollisionShapeDescription *localCollisionShapes,
__global float * g_vertexInverseMasses)
{
int nodeID = get_global_id(0);
float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
int clothIdentifier = g_vertexClothIdentifier[nodeID];
// Abort if this is not a valid cloth
if( clothIdentifier < 0 )
return;
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 0.f);
float clothFriction = g_perClothFriction[clothIdentifier];
float dampingFactor = g_clothDampingFactor[clothIdentifier];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
float4 velocity = difference*velocityCoefficient*isolverdt;
float inverseMass = g_vertexInverseMasses[nodeID];
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( numObjects > 0 )
{
// We have some possible collisions to deal with
// First load all of the collision objects into LDS
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
if( get_local_id(0) < numObjects )
{
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
}
}
// Safe as the vertices are padded so that not more than one soft body is in a group
barrier(CLK_LOCAL_MEM_FENCE);
// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
if( numObjects > 0 )
{
// We have some possible collisions to deal with
for( int collision = 0; collision < numObjects; ++collision )
{
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
float colliderFriction = localCollisionShapes[collision].friction;
if( localCollisionShapes[collision].collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Colliding with a capsule
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
float capsuleRadius = localCollisionShapes[collision].radius;
float capsuleMargin = localCollisionShapes[collision].margin;
int capsuleupAxis = localCollisionShapes[collision].upAxis;
if ( capsuleHalfHeight <= 0 )
capsuleHalfHeight = 0.0001f;
float4 worldTransform[4];
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
// Correctly define capsule centerline vector
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
c2.x = -c1.x;
c2.y = -c1.y;
c2.z = -c1.z;
float4 worldC1 = matrixVectorMul(worldTransform, c1);
float4 worldC2 = matrixVectorMul(worldTransform, c2);
float4 segment = (float4)((worldC2 - worldC1).xyz, 0.f);
float4 segmentNormalized = mynormalize3(segment);
float distanceAlongSegment =mydot3a( (position - worldC1), segmentNormalized );
float4 closestPointOnSegment = (worldC1 + (float4)(segmentNormalized * distanceAlongSegment));
float distanceFromLine = mylength3(position - closestPointOnSegment);
float distanceFromC1 = mylength3(worldC1 - position);
float distanceFromC2 = mylength3(worldC2 - position);
// Final distance from collision, point to push from, direction to push in
// for impulse force
float dist;
float4 normalVector;
if( distanceAlongSegment < 0 )
{
dist = distanceFromC1;
normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
} else if( distanceAlongSegment > length(segment) ) {
dist = distanceFromC2;
normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);
} else {
dist = distanceFromLine;
normalVector = (float4)(normalize(position - closestPointOnSegment).xyz, 0.f);
}
float minDistance = capsuleRadius + capsuleMargin;
float4 closestPointOnSurface = (float4)((position + (minDistance - dist) * normalVector).xyz, 0.f);
float4 colliderLinearVelocity = shapeDescription.linearVelocity;
float4 colliderAngularVelocity = shapeDescription.angularVelocity;
float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, closestPointOnSurface - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
// Check for a collision
if( dist < minDistance )
{
// Project back to surface along normal
position = closestPointOnSurface;
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
float4 relativeVelocity = velocity - velocityOfSurfacePoint;
float4 p1 = mynormalize3(cross(normalVector, segment));
float4 p2 = mynormalize3(cross(p1, normalVector));
float4 tangentialVel = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
float frictionCoef = (colliderFriction * clothFriction);
if (frictionCoef>1.f)
frictionCoef = 1.f;
//only apply friction if objects are not moving apart
float projVel = mydot3a(relativeVelocity,normalVector);
if ( projVel >= -0.001f)
{
if ( inverseMass > 0 )
{
//float4 myforceOnVertex = -tangentialVel * frictionCoef * isolverdt * (1.0f / inverseMass);
position += (-tangentialVel * frictionCoef) / (isolverdt);
}
}
// In case of no collision, this is the value of velocity
velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
}
}
}
}
g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);
// Update external force
g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
}
);

View file

@ -0,0 +1,57 @@
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel void
SolvePositionsFromLinksKernel(
const int startLink,
const int numLinks,
const float kst,
const float ti,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions GUID_ARG)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
float massLSC = g_linksMassLSC[linkID];
float restLengthSquared = g_linksRestLengthSquared[linkID];
if( massLSC > 0.0f )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 position0 = g_vertexPositions[node0];
float4 position1 = g_vertexPositions[node1];
float inverseMass0 = g_verticesInverseMass[node0];
float inverseMass1 = g_verticesInverseMass[node1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
g_vertexPositions[node0] = position0;
g_vertexPositions[node1] = position1;
}
}
}
);

View file

@ -0,0 +1,130 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel __attribute__((reqd_work_group_size(WAVEFRONT_BLOCK_MULTIPLIER*WAVEFRONT_SIZE, 1, 1)))
void
SolvePositionsFromLinksKernel(
const int startWaveInBatch,
const int numWaves,
const float kst,
const float ti,
__global int2 *g_wavefrontBatchCountsVertexCounts,
__global int *g_vertexAddressesPerWavefront,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions,
__local int2 *wavefrontBatchCountsVertexCounts,
__local float4 *vertexPositionSharedData,
__local float *vertexInverseMassSharedData)
{
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
barrier(CLK_LOCAL_MEM_FENCE);
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float4 position0 = vertexPositionSharedData[vertexAddress0];
float4 position1 = vertexPositionSharedData[vertexAddress1];
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
// Ensure compiler does not re-order memory operations
barrier(CLK_LOCAL_MEM_FENCE);
vertexPositionSharedData[vertexAddress0] = position0;
vertexPositionSharedData[vertexAddress1] = position1;
// Ensure compiler does not re-order memory operations
barrier(CLK_LOCAL_MEM_FENCE);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
}
}
}
);

View file

@ -0,0 +1,44 @@
MSTRINGIFY(
/*#define float3 float4
float dot3(float3 a, float3 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}*/
__kernel void
UpdateConstantsKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float4 * g_vertexPositions,
__global float * g_vertexInverseMasses,
__global float * g_linksMaterialLSC,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_linksRestLengths)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_vertexInverseMasses[node0];
float inverseMass1 = g_vertexInverseMasses[node1];
float3 difference = position0 - position1;
float length2 = dot(difference, difference);
float length = sqrt(length2);
g_linksRestLengths[linkID] = length;
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
g_linksRestLengthSquared[linkID] = length*length;
}
}
);

View file

@ -0,0 +1,25 @@
MSTRINGIFY(
__kernel void
UpdateFixedVertexPositions(
const uint numNodes,
__global int * g_anchorIndex,
__global float4 * g_vertexPositions,
__global float4 * g_anchorPositions GUID_ARG)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int anchorIndex = g_anchorIndex[nodeID];
float4 position = g_vertexPositions[nodeID];
if ( anchorIndex >= 0 )
{
float4 anchorPosition = g_anchorPositions[anchorIndex];
g_vertexPositions[nodeID] = anchorPosition;
}
}
}
);

View file

@ -0,0 +1,39 @@
MSTRINGIFY(
__kernel void
updateVelocitiesFromPositionsWithVelocitiesKernel(
int numNodes,
float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float *g_clothVelocityCorrectionCoefficients,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 previousPosition = g_vertexPreviousPositions[nodeID];
float4 velocity = g_vertexVelocities[nodeID];
int clothIndex = g_vertexClothIndices[nodeID];
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
velocity += difference*velocityCorrectionCoefficient*isolverdt;
// Damp the velocity
velocity *= velocityCoefficient;
g_vertexVelocities[nodeID] = velocity;
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,102 @@
MSTRINGIFY(
float length3(float4 a)
{
a.w = 0;
return length(a);
}
float4 normalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
__kernel void
ResetNormalsAndAreasKernel(
const unsigned int numNodes,
__global float4 * g_vertexNormals,
__global float * g_vertexArea GUID_ARG)
{
if( get_global_id(0) < numNodes )
{
g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
g_vertexArea[get_global_id(0)] = 0.0f;
}
}
__kernel void
UpdateSoftBodiesKernel(
const unsigned int startFace,
const unsigned int numFaces,
__global int4 * g_triangleVertexIndexSet,
__global float4 * g_vertexPositions,
__global float4 * g_vertexNormals,
__global float * g_vertexArea,
__global float4 * g_triangleNormals,
__global float * g_triangleArea GUID_ARG)
{
int faceID = get_global_id(0) + startFace;
if( get_global_id(0) < numFaces )
{
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
int nodeIndex0 = triangleIndexSet.x;
int nodeIndex1 = triangleIndexSet.y;
int nodeIndex2 = triangleIndexSet.z;
float4 node0 = g_vertexPositions[nodeIndex0];
float4 node1 = g_vertexPositions[nodeIndex1];
float4 node2 = g_vertexPositions[nodeIndex2];
float4 nodeNormal0 = g_vertexNormals[nodeIndex0];
float4 nodeNormal1 = g_vertexNormals[nodeIndex1];
float4 nodeNormal2 = g_vertexNormals[nodeIndex2];
float vertexArea0 = g_vertexArea[nodeIndex0];
float vertexArea1 = g_vertexArea[nodeIndex1];
float vertexArea2 = g_vertexArea[nodeIndex2];
float4 vector0 = node1 - node0;
float4 vector1 = node2 - node0;
float4 faceNormal = cross(vector0, vector1);
float triangleArea = length(faceNormal);
nodeNormal0 = nodeNormal0 + faceNormal;
nodeNormal1 = nodeNormal1 + faceNormal;
nodeNormal2 = nodeNormal2 + faceNormal;
vertexArea0 = vertexArea0 + triangleArea;
vertexArea1 = vertexArea1 + triangleArea;
vertexArea2 = vertexArea2 + triangleArea;
g_triangleNormals[faceID] = normalize3(faceNormal);
g_vertexNormals[nodeIndex0] = nodeNormal0;
g_vertexNormals[nodeIndex1] = nodeNormal1;
g_vertexNormals[nodeIndex2] = nodeNormal2;
g_triangleArea[faceID] = triangleArea;
g_vertexArea[nodeIndex0] = vertexArea0;
g_vertexArea[nodeIndex1] = vertexArea1;
g_vertexArea[nodeIndex2] = vertexArea2;
}
}
__kernel void
NormalizeNormalsAndAreasKernel(
const unsigned int numNodes,
__global int * g_vertexTriangleCount,
__global float4 * g_vertexNormals,
__global float * g_vertexArea GUID_ARG)
{
if( get_global_id(0) < numNodes )
{
float4 normal = g_vertexNormals[get_global_id(0)];
float area = g_vertexArea[get_global_id(0)];
int numTriangles = g_vertexTriangleCount[get_global_id(0)];
float vectorLength = length3(normal);
g_vertexNormals[get_global_id(0)] = normalize3(normal);
g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
}
}
);

View file

@ -0,0 +1,34 @@
MSTRINGIFY(
__kernel void
updateVelocitiesFromPositionsWithoutVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 previousPosition = g_vertexPreviousPositions[nodeID];
float4 velocity = g_vertexVelocities[nodeID];
int clothIndex = g_vertexClothIndices[nodeID];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
velocity = difference*velocityCoefficient*isolverdt;
g_vertexVelocities[nodeID] = velocity;
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View file

@ -0,0 +1,28 @@
MSTRINGIFY(
__kernel void
UpdatePositionsFromVelocitiesKernel(
const int numNodes,
const float solverSDT,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexCurrentPosition GUID_ARG)
{
int vertexID = get_global_id(0);
if( vertexID < numNodes )
{
float4 previousPosition = g_vertexPreviousPositions[vertexID];
float4 velocity = g_vertexVelocities[vertexID];
float4 newPosition = previousPosition + velocity*solverSDT;
g_vertexCurrentPosition[vertexID] = newPosition;
g_vertexPreviousPositions[vertexID] = newPosition;
}
}
);

View file

@ -0,0 +1,45 @@
MSTRINGIFY(
__kernel void
VSolveLinksKernel(
int startLink,
int numLinks,
float kst,
__global int2 * g_linksVertexIndices,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength,
__global float * g_vertexInverseMass,
__global float4 * g_vertexVelocity GUID_ARG)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linkLengthRatio = g_linksLengthRatio[linkID];
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
float vertexInverseMass0 = g_vertexInverseMass[node0];
float vertexInverseMass1 = g_vertexInverseMass[node1];
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
float dotResult = dot(linkCurrentLength, nodeDifference);
float j = -dotResult*linkLengthRatio*kst;
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
vertexVelocity0 += velocityChange0;
vertexVelocity1 -= velocityChange1;
g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
}
}
);

View file

@ -0,0 +1,209 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
#define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
// OpenCL support
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif //__APPLE__
#endif//USE_MINICL
#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
#endif
template <typename ElementType> class btOpenCLBuffer
{
public:
cl_command_queue m_cqCommandQue;
cl_context m_clContext;
cl_mem m_buffer;
btAlignedObjectArray< ElementType > * m_CPUBuffer;
int m_gpuSize;
bool m_onGPU;
bool m_readOnlyOnGPU;
bool m_allocated;
bool createBuffer( cl_mem* preexistingBuffer = 0)
{
cl_int err;
if( preexistingBuffer )
{
m_buffer = *preexistingBuffer;
}
else {
cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
// At a minimum the buffer must exist
if( size == 0 )
size = sizeof(ElementType);
m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
if( err != CL_SUCCESS )
{
btAssert( "Buffer::Buffer(m_buffer)");
}
}
m_gpuSize = m_CPUBuffer->size();
return true;
}
public:
btOpenCLBuffer( cl_command_queue commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
:m_cqCommandQue(commandQue),
m_clContext(ctx),
m_buffer(0),
m_CPUBuffer(CPUBuffer),
m_gpuSize(0),
m_onGPU(false),
m_readOnlyOnGPU(readOnly),
m_allocated(false)
{
}
~btOpenCLBuffer()
{
clReleaseMemObject(m_buffer);
}
bool moveToGPU()
{
cl_int err;
if( (m_CPUBuffer->size() != m_gpuSize) )
{
m_onGPU = false;
}
if( !m_allocated && m_CPUBuffer->size() == 0 )
{
// If it isn't on the GPU and yet there is no data on the CPU side this may cause a problem with some kernels.
// We should create *something* on the device side
if (!createBuffer()) {
return false;
}
m_allocated = true;
}
if( !m_onGPU && m_CPUBuffer->size() > 0 )
{
if (!m_allocated || (m_CPUBuffer->size() != m_gpuSize)) {
if (!createBuffer()) {
return false;
}
m_allocated = true;
}
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer,
CL_FALSE,
0,
size,
&((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS )
{
btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" );
}
m_onGPU = true;
}
return true;
}
bool moveFromGPU()
{
cl_int err;
if (m_CPUBuffer->size() > 0) {
if (m_onGPU && !m_readOnlyOnGPU) {
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
err = clEnqueueReadBuffer(m_cqCommandQue,
m_buffer,
CL_TRUE,
0,
size,
&((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS )
{
btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)" );
}
m_onGPU = false;
}
}
return true;
}
bool copyFromGPU()
{
cl_int err;
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
if (m_CPUBuffer->size() > 0) {
if (m_onGPU && !m_readOnlyOnGPU) {
err = clEnqueueReadBuffer(m_cqCommandQue,
m_buffer,
CL_TRUE,
0,size,
&((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS )
{
btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)");
}
}
}
return true;
}
virtual void changedOnCPU()
{
m_onGPU = false;
}
}; // class btOpenCLBuffer
#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H

View file

@ -0,0 +1,99 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData
{
public:
bool m_onGPU;
cl_command_queue m_cqCommandQue;
btOpenCLBuffer<LinkNodePair> m_clLinks;
btOpenCLBuffer<float> m_clLinkStrength;
btOpenCLBuffer<float> m_clLinksMassLSC;
btOpenCLBuffer<float> m_clLinksRestLengthSquared;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clLinksCLength;
btOpenCLBuffer<float> m_clLinksLengthRatio;
btOpenCLBuffer<float> m_clLinksRestLength;
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_batchStartLengths;
btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyLinkDataOpenCL();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt(
const LinkDescription &link,
int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H

View file

@ -0,0 +1,169 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
{
public:
bool m_onGPU;
cl_command_queue m_cqCommandQue;
const int m_wavefrontSize;
const int m_linksPerWorkItem;
const int m_maxLinksPerWavefront;
int m_maxBatchesWithinWave;
int m_maxVerticesWithinWave;
int m_numWavefronts;
int m_maxVertex;
struct NumBatchesVerticesPair
{
int numBatches;
int numVertices;
};
btAlignedObjectArray<int> m_linksPerWavefront;
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
btOpenCLBuffer< NumBatchesVerticesPair > m_clNumBatchesAndVerticesWithinWaves;
// All arrays here will contain batches of m_maxLinksPerWavefront links
// ordered by wavefront.
// with either global vertex pairs or local vertex pairs
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
btOpenCLBuffer<int> m_clWavefrontVerticesGlobalAddresses;
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
btOpenCLBuffer<LinkNodePair> m_clLinkVerticesLocalAddresses;
btOpenCLBuffer<float> m_clLinkStrength;
btOpenCLBuffer<float> m_clLinksMassLSC;
btOpenCLBuffer<float> m_clLinksRestLengthSquared;
btOpenCLBuffer<float> m_clLinksRestLength;
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt(
const LinkDescription &link,
int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
int getMaxVerticesPerWavefront()
{
return m_maxVerticesWithinWave;
}
int getWavefrontSize()
{
return m_wavefrontSize;
}
int getLinksPerWorkItem()
{
return m_linksPerWorkItem;
}
int getMaxLinksPerWavefront()
{
return m_maxLinksPerWavefront;
}
int getMaxBatchesPerWavefront()
{
return m_maxBatchesWithinWave;
}
int getNumWavefronts()
{
return m_numWavefronts;
}
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
{
return m_numBatchesAndVerticesWithinWaves[wavefront];
}
int getVertexGlobalAddresses( int vertexIndex )
{
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
}
/**
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
*/
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
{
return m_linkVerticesLocalAddresses[linkIndex];
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H

View file

@ -0,0 +1,126 @@
#include "btSoftBodySolverOutputCLtoGL.h"
#include <stdio.h> //@todo: remove the debugging printf at some stage
#include "btSoftBodySolver_OpenCL.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "btSoftBodySolverVertexBuffer_OpenGL.h"
#include "BulletSoftBody/btSoftBody.h"
////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A
static char* OutputToVertexArrayCLString =
#include "OpenCLC10/OutputToVertexArray.cl"
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
static const size_t workGroupSize = 128;
void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
{
btSoftBodySolver *solver = softBody->getSoftBodySolver();
btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
checkInitialized();
btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
const int firstVertex = currentCloth->getFirstVertex();
const int lastVertex = firstVertex + currentCloth->getNumVertices();
if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {
const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);
cl_int ciErrNum = CL_SUCCESS;
cl_mem clBuffer = openGLVertexBuffer->getBuffer();
cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
if( !vertexBuffer->hasNormals() )
outputKernel = outputToVertexArrayWithoutNormalsKernel;
ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
int numVertices = currentCloth->getNumVertices();
ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
if( vertexBuffer->hasVertexPositions() )
{
int vertexOffset = vertexBuffer->getVertexOffset();
int vertexStride = vertexBuffer->getVertexStride();
ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
}
if( vertexBuffer->hasNormals() )
{
int normalOffset = vertexBuffer->getNormalOffset();
int normalStride = vertexBuffer->getNormalStride();
ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
}
size_t numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
}
ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
}
} else {
btAssert( "Undefined output for this solver output" == false );
}
// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
clFinish(m_cqCommandQue);
} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
bool btSoftBodySolverOutputCLtoGL::buildShaders()
{
// Ensure current kernels are released first
releaseKernels();
bool returnVal = true;
if( m_shadersInitialized )
return true;
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
if( returnVal )
m_shadersInitialized = true;
return returnVal;
} // btSoftBodySolverOutputCLtoGL::buildShaders
void btSoftBodySolverOutputCLtoGL::releaseKernels()
{
RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
m_shadersInitialized = false;
} // btSoftBodySolverOutputCLtoGL::releaseKernels
bool btSoftBodySolverOutputCLtoGL::checkInitialized()
{
if( !m_shadersInitialized )
if( buildShaders() )
m_shadersInitialized = true;
return m_shadersInitialized;
}

View file

@ -0,0 +1,62 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
#include "btSoftBodySolver_OpenCL.h"
/**
* Class to manage movement of data from a solver to a given target.
* This version is the CL to GL interop version.
*/
class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
{
protected:
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
CLFunctions clFunctions;
cl_kernel outputToVertexArrayWithNormalsKernel;
cl_kernel outputToVertexArrayWithoutNormalsKernel;
bool m_shadersInitialized;
virtual bool checkInitialized();
virtual bool buildShaders();
void releaseKernels();
public:
btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ),
m_cxMainContext( cxMainContext ),
clFunctions(cqCommandQue, cxMainContext),
outputToVertexArrayWithNormalsKernel( 0 ),
outputToVertexArrayWithoutNormalsKernel( 0 ),
m_shadersInitialized( false )
{
}
virtual ~btSoftBodySolverOutputCLtoGL()
{
releaseKernels();
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H

View file

@ -0,0 +1,84 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData
{
public:
bool m_onGPU;
cl_command_queue m_queue;
btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet> m_clVertexIndices;
btOpenCLBuffer<float> m_clArea;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clNormal;
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_triangleAddresses;
/**
* Start and length values for computation batches over link data.
*/
struct btSomePair
{
btSomePair() {}
btSomePair(int f,int s)
:first(f),second(s)
{
}
int first;
int second;
};
btAlignedObjectArray< btSomePair > m_batchStartLengths;
public:
btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx );
virtual ~btSoftBodyTriangleDataOpenCL();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createTriangles( int numTriangles );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire triangle set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
}; // class btSoftBodyTriangleDataOpenCL
#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H

View file

@ -0,0 +1,166 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#include <CL/cl_gl.h>
#endif //__APPLE__
#endif//USE_MINICL
#ifdef _WIN32//for glut.h
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#ifdef _WINDOWS
#include <windows.h>
#include <GL/gl.h>
#include <GL/glu.h>
#else
#include <GL/glut.h>
#endif //_WINDOWS
#endif //APPLE
class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
{
protected:
/** OpenCL context */
cl_context m_context;
/** OpenCL command queue */
cl_command_queue m_commandQueue;
/** OpenCL interop buffer */
cl_mem m_buffer;
/** VBO in GL that is the basis of the interop buffer */
GLuint m_openGLVBO;
public:
/**
* context is the OpenCL context this interop buffer will work in.
* queue is the command queue that kernels and data movement will be enqueued into.
* openGLVBO is the OpenGL vertex buffer data will be copied into.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
*/
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
{
#ifndef USE_MINICL
cl_int ciErrNum = CL_SUCCESS;
m_context = context;
m_commandQueue = cqCommandQue;
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_openGLVBO = openGLVBO;
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
m_hasVertexPositions = true;
#else
btAssert(0);//MiniCL shouldn't get here
#endif
}
/**
* context is the OpenCL context this interop buffer will work in.
* queue is the command queue that kernels and data movement will be enqueued into.
* openGLVBO is the OpenGL vertex buffer data will be copied into.
* vertexOffset is the offset in floats to the first vertex.
* vertexStride is the stride in floats between vertices.
* normalOffset is the offset in floats to the first normal.
* normalStride is the stride in floats between normals.
*/
btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
{
#ifndef USE_MINICL
cl_int ciErrNum = CL_SUCCESS;
m_context = context;
m_commandQueue = cqCommandQue;
m_openGLVBO = openGLVBO;
m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
}
m_vertexOffset = vertexOffset;
m_vertexStride = vertexStride;
m_hasVertexPositions = true;
m_normalOffset = normalOffset;
m_normalStride = normalStride;
m_hasNormals = true;
#else
btAssert(0);
#endif //USE_MINICL
}
virtual ~btOpenGLInteropVertexBufferDescriptor()
{
clReleaseMemObject( m_buffer );
}
/**
* Return the type of the vertex buffer descriptor.
*/
virtual BufferTypes getBufferType() const
{
return OPENGL_BUFFER;
}
virtual cl_context getContext() const
{
return m_context;
}
virtual cl_mem getBuffer() const
{
return m_buffer;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H

View file

@ -0,0 +1,52 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData
{
protected:
bool m_onGPU;
cl_command_queue m_queue;
public:
btOpenCLBuffer<int> m_clClothIdentifier;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clVertexPosition;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clVertexPreviousPosition;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clVertexVelocity;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clVertexForceAccumulator;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clVertexNormal;
btOpenCLBuffer<float> m_clVertexInverseMass;
btOpenCLBuffer<float> m_clVertexArea;
btOpenCLBuffer<int> m_clVertexTriangleCount;
public:
btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyVertexDataOpenCL();
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H

View file

@ -0,0 +1,527 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
#define BT_SOFT_BODY_SOLVER_OPENCL_H
#include "stddef.h" //for size_t
#include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h"
#include "BulletSoftBody/btSoftBody.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#include "btSoftBodySolverLinkData_OpenCL.h"
#include "btSoftBodySolverVertexData_OpenCL.h"
#include "btSoftBodySolverTriangleData_OpenCL.h"
class CLFunctions
{
protected:
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
int m_kernelCompilationFailures;
public:
CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ),
m_cxMainContext( cxMainContext ),
m_kernelCompilationFailures(0)
{
}
int getKernelCompilationFailures() const
{
return m_kernelCompilationFailures;
}
/**
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/
virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
void clearKernelCompilationFailures()
{
m_kernelCompilationFailures=0;
}
};
/**
* Entry in the collision shape array.
* Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
*/
struct CollisionShapeDescription
{
Vectormath::Aos::Transform3 shapeTransform;
Vectormath::Aos::Vector3 linearVelocity;
Vectormath::Aos::Vector3 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Both needed for capsule
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
CollisionShapeDescription()
{
collisionShapeType = 0;
margin = 0;
friction = 0;
}
};
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btOpenCLAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
/**
* Update the bounds in the btSoftBody object
*/
void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
};
class btOpenCLSoftBodySolver : public btSoftBodySolver
{
public:
struct UIntVector3
{
UIntVector3()
{
x = 0;
y = 0;
z = 0;
_padding = 0;
}
UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
{
x = x_;
y = y_;
z = z_;
_padding = 0;
}
unsigned int x;
unsigned int y;
unsigned int z;
unsigned int _padding;
};
struct CollisionObjectIndices
{
CollisionObjectIndices( int f, int e )
{
firstObject = f;
endObject = e;
}
int firstObject;
int endObject;
};
btSoftBodyLinkDataOpenCL m_linkData;
btSoftBodyVertexDataOpenCL m_vertexData;
btSoftBodyTriangleDataOpenCL m_triangleData;
protected:
CLFunctions m_defaultCLFunctions;
CLFunctions* m_currentCLFunctions;
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
bool m_shadersInitialized;
/**
* Cloths owned by this solver.
* Only our cloths are in this array.
*/
btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clPerClothAcceleration;
/** Wind velocity to be applied normal to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity;
btOpenCLBuffer<Vectormath::Aos::Vector3> m_clPerClothWindVelocity;
/** Velocity damping factor */
btAlignedObjectArray< float > m_perClothDampingFactor;
btOpenCLBuffer<float> m_clPerClothDampingFactor;
/** Velocity correction coefficient */
btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient;
btOpenCLBuffer<float> m_clPerClothVelocityCorrectionCoefficient;
/** Lift parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothLiftFactor;
btOpenCLBuffer<float> m_clPerClothLiftFactor;
/** Drag parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothDragFactor;
btOpenCLBuffer<float> m_clPerClothDragFactor;
/** Density of the medium in which each cloth sits */
btAlignedObjectArray< float > m_perClothMediumDensity;
btOpenCLBuffer<float> m_clPerClothMediumDensity;
/**
* Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
*/
btAlignedObjectArray< CollisionObjectIndices > m_perClothCollisionObjects;
btOpenCLBuffer<CollisionObjectIndices> m_clPerClothCollisionObjects;
/**
* Collision shapes being passed across to the cloths in this solver.
*/
btAlignedObjectArray< CollisionShapeDescription > m_collisionObjectDetails;
btOpenCLBuffer< CollisionShapeDescription > m_clCollisionObjectDetails;
/**
* Friction coefficient for each cloth
*/
btAlignedObjectArray< float > m_perClothFriction;
btOpenCLBuffer< float > m_clPerClothFriction;
// anchor node info
struct AnchorNodeInfoCL
{
int clVertexIndex;
btSoftBody::Node* pNode;
};
btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
btOpenCLBuffer<Vectormath::Aos::Point3> m_clAnchorPosition;
btAlignedObjectArray<int> m_anchorIndex;
btOpenCLBuffer<int> m_clAnchorIndex;
bool m_bUpdateAnchoredNodePos;
cl_kernel m_prepareLinksKernel;
cl_kernel m_solvePositionsFromLinksKernel;
cl_kernel m_updateConstantsKernel;
cl_kernel m_integrateKernel;
cl_kernel m_addVelocityKernel;
cl_kernel m_updatePositionsFromVelocitiesKernel;
cl_kernel m_updateVelocitiesFromPositionsWithoutVelocitiesKernel;
cl_kernel m_updateVelocitiesFromPositionsWithVelocitiesKernel;
cl_kernel m_vSolveLinksKernel;
cl_kernel m_solveCollisionsAndUpdateVelocitiesKernel;
cl_kernel m_resetNormalsAndAreasKernel;
cl_kernel m_normalizeNormalsAndAreasKernel;
cl_kernel m_updateSoftBodiesKernel;
cl_kernel m_outputToVertexArrayKernel;
cl_kernel m_applyForcesKernel;
cl_kernel m_updateFixedVertexPositionsKernel;
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
size_t m_defaultWorkGroupSize;
virtual bool buildShaders();
void resetNormalsAndAreas( int numVertices );
void normalizeNormalsAndAreas( int numVertices );
void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
void prepareCollisionConstraints();
Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
int findSoftBodyIndex( const btSoftBody* const softBody );
virtual void applyForces( float solverdt );
void updateFixedVertexPositions();
/**
* Integrate motion on the solver.
*/
virtual void integrate( float solverdt );
virtual void updateConstants( float timeStep );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
//////////////////////////////////////
// Kernel dispatches
void prepareLinks();
void solveLinksForVelocity( int startLink, int numLinks, float kst );
void updatePositionsFromVelocities( float solverdt );
virtual void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
virtual void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
void updateBounds();
void releaseKernels();
public:
btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
virtual ~btOpenCLSoftBodySolver();
btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
virtual btSoftBodyLinkData &getLinkData();
virtual btSoftBodyVertexData &getVertexData();
virtual btSoftBodyTriangleData &getTriangleData();
virtual SolverTypes getSolverType() const
{
return CL_SOLVER;
}
virtual bool checkInitialized();
virtual void updateSoftBodies( );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies(bool bMove = true);
virtual void solveConstraints( float solverdt );
virtual void predictMotion( float solverdt );
virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
virtual void processCollision( btSoftBody*, btSoftBody* );
virtual void setDefaultWorkgroupSize(size_t workGroupSize)
{
m_defaultWorkGroupSize = workGroupSize;
}
virtual size_t getDefaultWorkGroupSize() const
{
return m_defaultWorkGroupSize;
}
void setCLFunctions(CLFunctions* funcs)
{
if (funcs)
m_currentCLFunctions = funcs;
else
m_currentCLFunctions = &m_defaultCLFunctions;
}
}; // btOpenCLSoftBodySolver
/**
* Class to manage movement of data from a solver to a given target.
* This version is the CL to CPU version.
*/
class btSoftBodySolverOutputCLtoCPU : public btSoftBodySolverOutput
{
protected:
public:
btSoftBodySolverOutputCLtoCPU()
{
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H

View file

@ -0,0 +1,81 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
#include "stddef.h" //for size_t
#include "vectormath/vmInclude.h"
#include "btSoftBodySolver_OpenCL.h"
#include "btSoftBodySolverBuffer_OpenCL.h"
#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
#include "btSoftBodySolverVertexData_OpenCL.h"
#include "btSoftBodySolverTriangleData_OpenCL.h"
class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
{
protected:
btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
virtual bool buildShaders();
void updateConstants( float timeStep );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
//////////////////////////////////////
// Kernel dispatches
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
public:
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
virtual ~btOpenCLSoftBodySolverSIMDAware();
virtual SolverTypes getSolverType() const
{
return CL_SIMD_SOLVER;
}
virtual btSoftBodyLinkData &getLinkData();
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void solveConstraints( float solverdt );
}; // btOpenCLSoftBodySolverSIMDAware
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H

View file

@ -0,0 +1,748 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_SOFT_BODY_SOLVER_DATA_H
#define BT_SOFT_BODY_SOLVER_DATA_H
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
#include "vectormath/vmInclude.h"
class btSoftBodyLinkData
{
public:
/**
* Class representing a link as a set of three indices into the vertex array.
*/
class LinkNodePair
{
public:
int vertex0;
int vertex1;
LinkNodePair()
{
vertex0 = 0;
vertex1 = 0;
}
LinkNodePair( int v0, int v1 )
{
vertex0 = v0;
vertex1 = v1;
}
};
/**
* Class describing a link for input into the system.
*/
class LinkDescription
{
protected:
int m_vertex0;
int m_vertex1;
float m_linkLinearStiffness;
float m_linkStrength;
public:
LinkDescription()
{
m_vertex0 = 0;
m_vertex1 = 0;
m_linkLinearStiffness = 1.0;
m_linkStrength = 1.0;
}
LinkDescription( int newVertex0, int newVertex1, float linkLinearStiffness )
{
m_vertex0 = newVertex0;
m_vertex1 = newVertex1;
m_linkLinearStiffness = linkLinearStiffness;
m_linkStrength = 1.0;
}
LinkNodePair getVertexPair() const
{
LinkNodePair nodes;
nodes.vertex0 = m_vertex0;
nodes.vertex1 = m_vertex1;
return nodes;
}
void setVertex0( int vertex )
{
m_vertex0 = vertex;
}
void setVertex1( int vertex )
{
m_vertex1 = vertex;
}
void setLinkLinearStiffness( float linearStiffness )
{
m_linkLinearStiffness = linearStiffness;
}
void setLinkStrength( float strength )
{
m_linkStrength = strength;
}
int getVertex0() const
{
return m_vertex0;
}
int getVertex1() const
{
return m_vertex1;
}
float getLinkStrength() const
{
return m_linkStrength;
}
float getLinkLinearStiffness() const
{
return m_linkLinearStiffness;
}
};
protected:
// NOTE:
// Vertex reference data is stored relative to global array, not relative to individual cloth.
// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
// to another.
btAlignedObjectArray< LinkNodePair > m_links; // Vertex pair for the link
btAlignedObjectArray< float > m_linkStrength; // Strength of each link
// (inverseMassA + inverseMassB)/ linear stiffness coefficient
btAlignedObjectArray< float > m_linksMassLSC;
btAlignedObjectArray< float > m_linksRestLengthSquared;
// Current vector length of link
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_linksCLength;
// 1/(current length * current length * massLSC)
btAlignedObjectArray< float > m_linksLengthRatio;
btAlignedObjectArray< float > m_linksRestLength;
btAlignedObjectArray< float > m_linksMaterialLinearStiffnessCoefficient;
public:
btSoftBodyLinkData()
{
}
virtual ~btSoftBodyLinkData()
{
}
virtual void clear()
{
m_links.resize(0);
m_linkStrength.resize(0);
m_linksMassLSC.resize(0);
m_linksRestLengthSquared.resize(0);
m_linksLengthRatio.resize(0);
m_linksRestLength.resize(0);
m_linksMaterialLinearStiffnessCoefficient.resize(0);
}
int getNumLinks()
{
return m_links.size();
}
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks )
{
int previousSize = m_links.size();
int newSize = previousSize + numLinks;
// Resize all the arrays that store link data
m_links.resize( newSize );
m_linkStrength.resize( newSize );
m_linksMassLSC.resize( newSize );
m_linksRestLengthSquared.resize( newSize );
m_linksCLength.resize( newSize );
m_linksLengthRatio.resize( newSize );
m_linksRestLength.resize( newSize );
m_linksMaterialLinearStiffnessCoefficient.resize( newSize );
}
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex )
{
m_links[linkIndex] = link.getVertexPair();
m_linkStrength[linkIndex] = link.getLinkStrength();
m_linksMassLSC[linkIndex] = 0.f;
m_linksRestLengthSquared[linkIndex] = 0.f;
m_linksCLength[linkIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_linksLengthRatio[linkIndex] = 0.f;
m_linksRestLength[linkIndex] = 0.f;
m_linksMaterialLinearStiffnessCoefficient[linkIndex] = link.getLinkLinearStiffness();
}
/**
* Return true if data is on the accelerator.
* The CPU version of this class will return true here because
* the CPU is the same as the accelerator.
*/
virtual bool onAccelerator()
{
return true;
}
/**
* Move data from host memory to the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveToAccelerator()
{
return true;
}
/**
* Move data from host memory from the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveFromAccelerator()
{
return true;
}
/**
* Return reference to the vertex index pair for link linkIndex as stored on the host.
*/
LinkNodePair &getVertexPair( int linkIndex )
{
return m_links[linkIndex];
}
/**
* Return reference to strength of link linkIndex as stored on the host.
*/
float &getStrength( int linkIndex )
{
return m_linkStrength[linkIndex];
}
/**
* Return a reference to the strength of the link corrected for link sorting.
* This is important if we are using data on an accelerator which has the data sorted in some fashion.
*/
virtual float &getStrengthCorrected( int linkIndex )
{
return getStrength( linkIndex );
}
/**
* Return reference to the rest length of link linkIndex as stored on the host.
*/
float &getRestLength( int linkIndex )
{
return m_linksRestLength[linkIndex];
}
/**
* Return reference to linear stiffness coefficient for link linkIndex as stored on the host.
*/
float &getLinearStiffnessCoefficient( int linkIndex )
{
return m_linksMaterialLinearStiffnessCoefficient[linkIndex];
}
/**
* Return reference to the MassLSC value for link linkIndex as stored on the host.
*/
float &getMassLSC( int linkIndex )
{
return m_linksMassLSC[linkIndex];
}
/**
* Return reference to rest length squared for link linkIndex as stored on the host.
*/
float &getRestLengthSquared( int linkIndex )
{
return m_linksRestLengthSquared[linkIndex];
}
/**
* Return reference to current length of link linkIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getCurrentLength( int linkIndex )
{
return m_linksCLength[linkIndex];
}
/**
* Return the link length ratio from for link linkIndex as stored on the host.
*/
float &getLinkLengthRatio( int linkIndex )
{
return m_linksLengthRatio[linkIndex];
}
};
/**
* Wrapper for vertex data information.
* By wrapping it like this we stand a good chance of being able to optimise for storage format easily.
* It should also help us make sure all the data structures remain consistent.
*/
class btSoftBodyVertexData
{
public:
/**
* Class describing a vertex for input into the system.
*/
class VertexDescription
{
private:
Vectormath::Aos::Point3 m_position;
/** Inverse mass. If this is 0f then the mass was 0 because that simplifies calculations. */
float m_inverseMass;
public:
VertexDescription()
{
m_position = Vectormath::Aos::Point3( 0.f, 0.f, 0.f );
m_inverseMass = 0.f;
}
VertexDescription( const Vectormath::Aos::Point3 &position, float mass )
{
m_position = position;
if( mass > 0.f )
m_inverseMass = 1.0f/mass;
else
m_inverseMass = 0.f;
}
void setPosition( const Vectormath::Aos::Point3 &position )
{
m_position = position;
}
void setInverseMass( float inverseMass )
{
m_inverseMass = inverseMass;
}
void setMass( float mass )
{
if( mass > 0.f )
m_inverseMass = 1.0f/mass;
else
m_inverseMass = 0.f;
}
Vectormath::Aos::Point3 getPosition() const
{
return m_position;
}
float getInverseMass() const
{
return m_inverseMass;
}
float getMass() const
{
if( m_inverseMass == 0.f )
return 0.f;
else
return 1.0f/m_inverseMass;
}
};
protected:
// identifier for the individual cloth
// For the CPU we don't really need this as we can grab the cloths and iterate over only their vertices
// For a parallel accelerator knowing on a per-vertex basis which cloth we're part of will help for obtaining
// per-cloth data
// For sorting etc it might also be helpful to be able to use in-array data such as this.
btAlignedObjectArray< int > m_clothIdentifier;
btAlignedObjectArray< Vectormath::Aos::Point3 > m_vertexPosition; // vertex positions
btAlignedObjectArray< Vectormath::Aos::Point3 > m_vertexPreviousPosition; // vertex positions
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_vertexVelocity; // Velocity
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_vertexForceAccumulator; // Force accumulator
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_vertexNormal; // Normals
btAlignedObjectArray< float > m_vertexInverseMass; // Inverse mass
btAlignedObjectArray< float > m_vertexArea; // Area controlled by the vertex
btAlignedObjectArray< int > m_vertexTriangleCount; // Number of triangles touching this vertex
public:
btSoftBodyVertexData()
{
}
virtual ~btSoftBodyVertexData()
{
}
virtual void clear()
{
m_clothIdentifier.resize(0);
m_vertexPosition.resize(0);
m_vertexPreviousPosition.resize(0);
m_vertexVelocity.resize(0);
m_vertexForceAccumulator.resize(0);
m_vertexNormal.resize(0);
m_vertexInverseMass.resize(0);
m_vertexArea.resize(0);
m_vertexTriangleCount.resize(0);
}
int getNumVertices()
{
return m_vertexPosition.size();
}
int getClothIdentifier( int vertexIndex )
{
return m_clothIdentifier[vertexIndex];
}
void setVertexAt( const VertexDescription &vertex, int vertexIndex )
{
m_vertexPosition[vertexIndex] = vertex.getPosition();
m_vertexPreviousPosition[vertexIndex] = vertex.getPosition();
m_vertexVelocity[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_vertexForceAccumulator[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_vertexNormal[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
m_vertexInverseMass[vertexIndex] = vertex.getInverseMass();
m_vertexArea[vertexIndex] = 0.f;
m_vertexTriangleCount[vertexIndex] = 0;
}
/**
* Create numVertices new vertices for cloth clothIdentifier
* maxVertices allows a buffer zone of extra vertices for alignment or tearing reasons.
*/
void createVertices( int numVertices, int clothIdentifier, int maxVertices = 0 )
{
int previousSize = m_vertexPosition.size();
if( maxVertices == 0 )
maxVertices = numVertices;
int newSize = previousSize + maxVertices;
// Resize all the arrays that store vertex data
m_clothIdentifier.resize( newSize );
m_vertexPosition.resize( newSize );
m_vertexPreviousPosition.resize( newSize );
m_vertexVelocity.resize( newSize );
m_vertexForceAccumulator.resize( newSize );
m_vertexNormal.resize( newSize );
m_vertexInverseMass.resize( newSize );
m_vertexArea.resize( newSize );
m_vertexTriangleCount.resize( newSize );
for( int vertexIndex = previousSize; vertexIndex < newSize; ++vertexIndex )
m_clothIdentifier[vertexIndex] = clothIdentifier;
for( int vertexIndex = (previousSize + numVertices); vertexIndex < newSize; ++vertexIndex )
m_clothIdentifier[vertexIndex] = -1;
}
// Get and set methods in header so they can be inlined
/**
* Return a reference to the position of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Point3 &getPosition( int vertexIndex )
{
return m_vertexPosition[vertexIndex];
}
Vectormath::Aos::Point3 getPosition( int vertexIndex ) const
{
return m_vertexPosition[vertexIndex];
}
/**
* Return a reference to the previous position of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Point3 &getPreviousPosition( int vertexIndex )
{
return m_vertexPreviousPosition[vertexIndex];
}
/**
* Return a reference to the velocity of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getVelocity( int vertexIndex )
{
return m_vertexVelocity[vertexIndex];
}
/**
* Return a reference to the force accumulator of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getForceAccumulator( int vertexIndex )
{
return m_vertexForceAccumulator[vertexIndex];
}
/**
* Return a reference to the normal of vertex vertexIndex as stored on the host.
*/
Vectormath::Aos::Vector3 &getNormal( int vertexIndex )
{
return m_vertexNormal[vertexIndex];
}
Vectormath::Aos::Vector3 getNormal( int vertexIndex ) const
{
return m_vertexNormal[vertexIndex];
}
/**
* Return a reference to the inverse mass of vertex vertexIndex as stored on the host.
*/
float &getInverseMass( int vertexIndex )
{
return m_vertexInverseMass[vertexIndex];
}
/**
* Get access to the area controlled by this vertex.
*/
float &getArea( int vertexIndex )
{
return m_vertexArea[vertexIndex];
}
/**
* Get access to the array of how many triangles touch each vertex.
*/
int &getTriangleCount( int vertexIndex )
{
return m_vertexTriangleCount[vertexIndex];
}
/**
* Return true if data is on the accelerator.
* The CPU version of this class will return true here because
* the CPU is the same as the accelerator.
*/
virtual bool onAccelerator()
{
return true;
}
/**
* Move data from host memory to the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveToAccelerator()
{
return true;
}
/**
* Move data to host memory from the accelerator if bCopy is false.
* If bCopy is true, copy data to host memory from the accelerator so that data
* won't be moved to accelerator when moveToAccelerator() is called next time.
* If bCopyMinimum is true, only vertex position and normal are copied.
* bCopyMinimum will be meaningful only if bCopy is true.
* The CPU version will always return that it has moved it.
*/
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
{
return true;
}
btAlignedObjectArray< Vectormath::Aos::Point3 > &getVertexPositions()
{
return m_vertexPosition;
}
};
class btSoftBodyTriangleData
{
public:
/**
* Class representing a triangle as a set of three indices into the
* vertex array.
*/
class TriangleNodeSet
{
public:
int vertex0;
int vertex1;
int vertex2;
int _padding;
TriangleNodeSet( )
{
vertex0 = 0;
vertex1 = 0;
vertex2 = 0;
_padding = -1;
}
TriangleNodeSet( int newVertex0, int newVertex1, int newVertex2 )
{
vertex0 = newVertex0;
vertex1 = newVertex1;
vertex2 = newVertex2;
}
};
class TriangleDescription
{
protected:
int m_vertex0;
int m_vertex1;
int m_vertex2;
public:
TriangleDescription()
{
m_vertex0 = 0;
m_vertex1 = 0;
m_vertex2 = 0;
}
TriangleDescription( int newVertex0, int newVertex1, int newVertex2 )
{
m_vertex0 = newVertex0;
m_vertex1 = newVertex1;
m_vertex2 = newVertex2;
}
TriangleNodeSet getVertexSet() const
{
btSoftBodyTriangleData::TriangleNodeSet nodes;
nodes.vertex0 = m_vertex0;
nodes.vertex1 = m_vertex1;
nodes.vertex2 = m_vertex2;
return nodes;
}
};
protected:
// NOTE:
// Vertex reference data is stored relative to global array, not relative to individual cloth.
// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
// to another.
btAlignedObjectArray< TriangleNodeSet > m_vertexIndices;
btAlignedObjectArray< float > m_area;
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_normal;
public:
btSoftBodyTriangleData()
{
}
virtual ~btSoftBodyTriangleData()
{
}
virtual void clear()
{
m_vertexIndices.resize(0);
m_area.resize(0);
m_normal.resize(0);
}
int getNumTriangles()
{
return m_vertexIndices.size();
}
virtual void setTriangleAt( const TriangleDescription &triangle, int triangleIndex )
{
m_vertexIndices[triangleIndex] = triangle.getVertexSet();
}
virtual void createTriangles( int numTriangles )
{
int previousSize = m_vertexIndices.size();
int newSize = previousSize + numTriangles;
// Resize all the arrays that store triangle data
m_vertexIndices.resize( newSize );
m_area.resize( newSize );
m_normal.resize( newSize );
}
/**
* Return the vertex index set for triangle triangleIndex as stored on the host.
*/
const TriangleNodeSet &getVertexSet( int triangleIndex )
{
return m_vertexIndices[triangleIndex];
}
/**
* Get access to the triangle area.
*/
float &getTriangleArea( int triangleIndex )
{
return m_area[triangleIndex];
}
/**
* Get access to the normal vector for this triangle.
*/
Vectormath::Aos::Vector3 &getNormal( int triangleIndex )
{
return m_normal[triangleIndex];
}
/**
* Return true if data is on the accelerator.
* The CPU version of this class will return true here because
* the CPU is the same as the accelerator.
*/
virtual bool onAccelerator()
{
return true;
}
/**
* Move data from host memory to the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveToAccelerator()
{
return true;
}
/**
* Move data from host memory from the accelerator.
* The CPU version will always return that it has moved it.
*/
virtual bool moveFromAccelerator()
{
return true;
}
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_DATA_H

View file

@ -0,0 +1,117 @@
/*
Copyright (C) 2009 Sony Computer Entertainment Inc.
All rights reserved.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_HEAP_MANAGER_H__
#define BT_HEAP_MANAGER_H__
#ifdef __SPU__
#define HEAP_STACK_SIZE 32
#else
#define HEAP_STACK_SIZE 64
#endif
#define MIN_ALLOC_SIZE 16
class HeapManager
{
private:
ATTRIBUTE_ALIGNED16(unsigned char *mHeap);
ATTRIBUTE_ALIGNED16(unsigned int mHeapBytes);
ATTRIBUTE_ALIGNED16(unsigned char *mPoolStack[HEAP_STACK_SIZE]);
ATTRIBUTE_ALIGNED16(unsigned int mCurStack);
public:
enum {ALIGN16,ALIGN128};
HeapManager(unsigned char *buf,int bytes)
{
mHeap = buf;
mHeapBytes = bytes;
clear();
}
~HeapManager()
{
}
int getAllocated()
{
return (int)(mPoolStack[mCurStack]-mHeap);
}
int getRest()
{
return mHeapBytes-getAllocated();
}
void *allocate(size_t bytes,int alignment = ALIGN16)
{
if(bytes <= 0) bytes = MIN_ALLOC_SIZE;
btAssert(mCurStack < (HEAP_STACK_SIZE-1));
#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__)
unsigned long long p = (unsigned long long )mPoolStack[mCurStack];
if(alignment == ALIGN128) {
p = ((p+127) & 0xffffffffffffff80);
bytes = (bytes+127) & 0xffffffffffffff80;
}
else {
bytes = (bytes+15) & 0xfffffffffffffff0;
}
btAssert(bytes <=(mHeapBytes-(p-(unsigned long long )mHeap)) );
#else
unsigned long p = (unsigned long )mPoolStack[mCurStack];
if(alignment == ALIGN128) {
p = ((p+127) & 0xffffff80);
bytes = (bytes+127) & 0xffffff80;
}
else {
bytes = (bytes+15) & 0xfffffff0;
}
btAssert(bytes <=(mHeapBytes-(p-(unsigned long)mHeap)) );
#endif
unsigned char * bla = (unsigned char *)(p + bytes);
mPoolStack[++mCurStack] = bla;
return (void*)p;
}
void deallocate(void *p)
{
(void) p;
mCurStack--;
}
void clear()
{
mPoolStack[0] = mHeap;
mCurStack = 0;
}
// void printStack()
// {
// for(unsigned int i=0;i<=mCurStack;i++) {
// PRINTF("memStack %2d 0x%x\n",i,(uint32_t)mPoolStack[i]);
// }
// }
};
#endif //BT_HEAP_MANAGER_H__

View file

@ -1,14 +0,0 @@
SubDir TOP src BulletMultiThreaded ;
#IncludeDir src/BulletMultiThreaded ;
Library bulletmultithreaded : [ Wildcard . : *.h *.cpp ] [ Wildcard MiniCLTask : *.h *.cpp ] [ Wildcard SpuNarrowPhaseCollisionTask : *.h *.cpp ] : noinstall ;
CFlags bulletmultithreaded : [ FIncludes $(TOP)/src/BulletMultiThreaded ] [ FIncludes $(TOP)/src/BulletMultiThreaded/vectormath/scalar/cpp ] ;
LibDepends bulletmultithreaded : ;
MsvcIncDirs bulletmultithreaded :
"../../src/BulletMultiThreaded"
"../../src/BulletMultiThreaded/vectormath/scalar/cpp"
;
InstallHeader [ Wildcard *.h ] : bulletmultithreaded ;

View file

@ -1,187 +0,0 @@
__ARCH_BITS__ := 32
# define macros
NARROWPHASEDIR=./SpuNarrowPhaseCollisionTask
SPU_TASKFILE=$(NARROWPHASEDIR)/SpuGatheringCollisionTask
IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
CELL_TOP ?= /opt/cell/sdk
CELL_SYSROOT := /opt/cell/sysroot
else
CELL_TOP ?= /opt/ibm/cell-sdk/prototype
CELL_SYSROOT := $(CELL_TOP)/sysroot
endif
USE_CCACHE=ccache
RM=rm -f
OUTDIR=./out
DEBUGFLAG=-DNDEBUG
LIBOUTDIR=../../lib/ibmsdk
COLLISIONDIR=../../src/BulletCollision
MATHDIR=../../src/LinearMath
ARCHITECTUREFLAG=-m$(__ARCH_BITS__)
ifeq "$(__ARCH_BITS__)" "64"
SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ -DUSE_ADDR64
else
SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__
endif
SPU_DEFFLAGS+=-DUSE_PE_BOX_BOX
SPU_GCC=$(USE_CCACHE) /usr/bin/spu-gcc
SPU_INCLUDEDIR= -Ivectormath/scalar/cpp -I. -I$(CELL_SYSROOT)/usr/spu/include -I../../src -I$(NARROWPHASEDIR)
#SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -Os -c -include spu_intrinsics.h -include stdbool.h
SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -O3 -mbranch-hints -fomit-frame-pointer -ftree-vectorize -finline-functions -ftree-vect-loop-version -ftree-loop-optimize -ffast-math -fno-rtti -fno-exceptions -c -include spu_intrinsics.h -include stdbool.h
SPU_LFLAGS= -Wl,-N
SPU_LIBRARIES=-lstdc++
SPU_EMBED=/usr/bin/ppu-embedspu
SPU_AR=/usr/bin/ar
SYMBOLNAME=spu_program
ifeq "$(__ARCH_BITS__)" "64"
PPU_DEFFLAGS= -DUSE_LIBSPE2 -DUSE_ADDR64
PPU_GCC=$(USE_CCACHE) /usr/bin/ppu-gcc
else
PPU_DEFFLAGS= -DUSE_LIBSPE2
PPU_GCC=$(USE_CCACHE) /usr/bin/ppu32-gcc
endif
PPU_CFLAGS= $(ARCHITECTUREFLAG) $(DEBUGFLAG) -W -Wall -Winline -O3 -c -mabi=altivec -maltivec -include altivec.h -include stdbool.h
PPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/include -I../../src -I$(NARROWPHASEDIR)
PPU_LFLAGS= $(ARCHITECTUREFLAG) -Wl,-m,elf$(__ARCH_BITS__)ppc
PPU_LIBRARIES= -lstdc++ -lsupc++ -lgcc -lgcov -lspe2 -lpthread -L../../lib/ibmsdk -lbulletcollision -lbulletdynamics -lbulletmath -L$(CELL_SYSROOT)/usr/lib$(__ARCH_BITS__) -R$(CELL_SYSROOT)/usr/lib
PPU_AR=/usr/bin/ar
MakeOut :
# rm -f -R $(OUTDIR) ; mkdir $(OUTDIR)
@echo "usage: make spu, make ppu, make all, or make clean"
# SPU
SpuTaskFile : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/SpuTaskFile.o $(SPU_TASKFILE).cpp
boxBoxDistance : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuFakeDma : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuContactManifoldCollisionAlgorithm_spu : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuContactManifoldCollisionAlgorithm.cpp
SpuCollisionShapes : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuContactResult : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
#SpuGatheringCollisionTask : MakeOut
# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuGjkPairDetector: MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuMinkowskiPenetrationDepthSolver : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuVoronoiSimplexSolver : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
#SpuLibspe2Support_spu : MakeOut
# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuLibspe2Support.cpp
## SPU-Bullet
btPersistentManifold : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/NarrowPhaseCollision/$@.cpp
btOptimizedBvh : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btCollisionObject : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionDispatch/$@.cpp
btTriangleCallback : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btTriangleIndexVertexArray : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btStridingMeshInterface : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btAlignedAllocator : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(MATHDIR)/$@.cpp
# PPU
SpuGatheringCollisionDispatcher : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SequentialThreadSupport: MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuLibspe2Support: MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
btThreadSupportInterface: MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuCollisionTaskProcess : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuContactManifoldCollisionAlgorithm : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuSampleTaskProcess : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
spu : boxBoxDistance SpuFakeDma SpuContactManifoldCollisionAlgorithm_spu SpuContactResult SpuTaskFile \
SpuGjkPairDetector SpuMinkowskiPenetrationDepthSolver SpuVoronoiSimplexSolver SpuCollisionShapes \
btPersistentManifold btOptimizedBvh btCollisionObject btTriangleCallback btTriangleIndexVertexArray \
btStridingMeshInterface btAlignedAllocator
$(SPU_GCC) -o $(OUTDIR)/spuCollision.elf \
$(OUTDIR)/SpuTaskFile.o \
$(OUTDIR)/SpuFakeDma.o \
$(OUTDIR)/boxBoxDistance.o \
$(OUTDIR)/SpuContactManifoldCollisionAlgorithm_spu.o \
$(OUTDIR)/SpuContactResult.o \
$(OUTDIR)/SpuCollisionShapes.o \
$(OUTDIR)/SpuGjkPairDetector.o \
$(OUTDIR)/SpuMinkowskiPenetrationDepthSolver.o \
$(OUTDIR)/SpuVoronoiSimplexSolver.o \
$(OUTDIR)/btPersistentManifold.o \
$(OUTDIR)/btTriangleCallback.o \
$(OUTDIR)/btTriangleIndexVertexArray.o \
$(OUTDIR)/btStridingMeshInterface.o \
$(OUTDIR)/btAlignedAllocator.o \
$(SPU_LFLAGS) $(SPU_LIBRARIES)
spu-embed : spu
$(SPU_EMBED) $(ARCHITECTUREFLAG) $(SYMBOLNAME) $(OUTDIR)/spuCollision.elf $(OUTDIR)/$@.o
$(SPU_AR) -qcs $(LIBOUTDIR)/libspu.a $(OUTDIR)/$@.o
ppu : SpuGatheringCollisionDispatcher SpuCollisionTaskProcess btThreadSupportInterface \
SpuLibspe2Support SpuContactManifoldCollisionAlgorithm SpuSampleTaskProcess
$(PPU_AR) -qcs $(LIBOUTDIR)/bulletmultithreaded.a \
$(OUTDIR)/SpuCollisionTaskProcess.o \
$(OUTDIR)/SpuSampleTaskProcess.o \
$(OUTDIR)/SpuGatheringCollisionDispatcher.o \
$(OUTDIR)/SpuLibspe2Support.o \
$(OUTDIR)/btThreadSupportInterface.o \
$(OUTDIR)/SpuContactManifoldCollisionAlgorithm.o
all : spu-embed ppu
clean:
$(RM) $(OUTDIR)/* ; $(RM) $(LIBOUTDIR)/libspu.a ; $(RM) $(LIBOUTDIR)/bulletmultithreaded.a

View file

@ -1,116 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "MiniCLTask.h"
#include "../PlatformDefinitions.h"
#include "../SpuFakeDma.h"
#include "LinearMath/btMinMax.h"
#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
#ifdef __SPU__
#include <spu_printf.h>
#else
#include <stdio.h>
#define spu_printf printf
#endif
#define __kernel
#define __global
#define get_global_id(a) guid
struct MiniCLTask_LocalStoreMemory
{
};
///////////////////////////////////////////////////
// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid)
{
// get oct-float index into global data array
int iGID = get_global_id(0);
// read inputs into registers
float8 f8InA = a[iGID];
float8 f8InB = b[iGID];
float8 f8Out = (float8)0.0f;
// add the vector elements
f8Out.s0 = f8InA.s0 + f8InB.s0;
f8Out.s1 = f8InA.s1 + f8InB.s1;
f8Out.s2 = f8InA.s2 + f8InB.s2;
f8Out.s3 = f8InA.s3 + f8InB.s3;
f8Out.s4 = f8InA.s4 + f8InB.s4;
f8Out.s5 = f8InA.s5 + f8InB.s5;
f8Out.s6 = f8InA.s6 + f8InB.s6;
f8Out.s7 = f8InA.s7 + f8InB.s7;
// write back out to GMEM
c[get_global_id(0)] = f8Out;
}
///////////////////////////////////////////////////
//-- MAIN METHOD
void processMiniCLTask(void* userPtr, void* lsMemory)
{
// BT_PROFILE("processSampleTask");
MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
MiniCLTaskDesc& taskDesc = *taskDescPtr;
printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
switch (taskDesc.m_kernelProgramId)
{
case CMD_MINICL_ADDVECTOR:
{
for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
{
VectorAdd(*(const float8**)&taskDesc.m_argData[0][0],*(const float8**)&taskDesc.m_argData[1][0],*(float8**)&taskDesc.m_argData[2][0],i);
}
break;
}
default:
{
printf("error in processMiniCLTask: unknown command id: %d\n",taskDesc.m_kernelProgramId);
}
};
}
#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory gLocalStoreMemory);
void* createMiniCLLocalStoreMemory()
{
return &gLocalStoreMemory;
}
#else
void* createMiniCLLocalStoreMemory()
{
return new MiniCLTask_LocalStoreMemory;
};
#endif

View file

@ -1,81 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef MINICL__TASK_H
#define MINICL__TASK_H
#include "../PlatformDefinitions.h"
#include "LinearMath/btScalar.h"
#include "LinearMath/btAlignedAllocator.h"
enum
{
CMD_MINICL_1= 1,
CMD_MINICL_ADDVECTOR
};
struct float8
{
float s0;
float s1;
float s2;
float s3;
float s4;
float s5;
float s6;
float s7;
float8(float scalar)
{
s0=s1=s2=s3=s4=s5=s6=s7=scalar;
}
};
#define MINICL_MAX_ARGLENGTH 128
#define MINI_CL_MAX_ARG 8
ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
{
BT_DECLARE_ALIGNED_ALLOCATOR();
MiniCLTaskDesc()
{
for (int i=0;i<MINI_CL_MAX_ARG;i++)
{
m_argSizes[i]=0;
}
}
uint32_t m_taskId;
uint32_t m_kernelProgramId;
uint32_t m_firstWorkUnit;
uint32_t m_lastWorkUnit;
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
int m_argSizes[MINI_CL_MAX_ARG];
};
void processMiniCLTask(void* userPtr, void* lsMemory);
void* createMiniCLLocalStoreMemory();
#endif //MINICL__TASK_H

View file

@ -1,227 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//#define __CELLOS_LV2__ 1
#define USE_SAMPLE_PROCESS 1
#ifdef USE_SAMPLE_PROCESS
#include "MiniCLTaskScheduler.h"
#include <stdio.h>
#ifdef __SPU__
void SampleThreadFunc(void* userPtr,void* lsMemory)
{
//do nothing
printf("hello world\n");
}
void* SamplelsMemoryFunc()
{
//don't create local store memory, just return 0
return 0;
}
#else
#include "btThreadSupportInterface.h"
//# include "SPUAssert.h"
#include <string.h>
extern "C" {
extern char SPU_SAMPLE_ELF_SYMBOL[];
}
MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
:m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
{
m_taskBusy.resize(m_maxNumOutstandingTasks);
m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_initialized = false;
m_threadInterface->startSPU();
}
MiniCLTaskScheduler::~MiniCLTaskScheduler()
{
m_threadInterface->stopSPU();
}
void MiniCLTaskScheduler::initialize()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("MiniCLTaskScheduler::initialize()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_initialized = true;
}
void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes)
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
#endif //DEBUG_SPU_TASK_SCHEDULING
m_taskBusy[m_currentTask] = true;
m_numBusyTasks++;
MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
{
// send task description in event message
taskDesc.m_firstWorkUnit = firstWorkUnit;
taskDesc.m_lastWorkUnit = lastWorkUnit;
taskDesc.m_kernelProgramId = kernelProgramId;
//some bookkeeping to recognize finished tasks
taskDesc.m_taskId = m_currentTask;
for (int i=0;i<MINI_CL_MAX_ARG;i++)
{
taskDesc.m_argSizes[i] = argSizes[i];
if (taskDesc.m_argSizes[i])
{
memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
}
}
}
m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
// if all tasks busy, wait for spu event to clear the task.
if (m_numBusyTasks >= m_maxNumOutstandingTasks)
{
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
m_threadInterface->waitForResponse(&taskId, &outputSize);
//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
// find new task buffer
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
if (!m_taskBusy[i])
{
m_currentTask = i;
break;
}
}
}
///Optional PPU-size post processing for each task
void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
{
}
void MiniCLTaskScheduler::flush()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("\nSpuCollisionTaskProcess::flush()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
// all tasks are issued, wait for all tasks to be complete
while(m_numBusyTasks > 0)
{
// Consolidating SPU code
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
{
m_threadInterface->waitForResponse(&taskId, &outputSize);
}
//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}
#endif
#endif //USE_SAMPLE_PROCESS

View file

@ -1,181 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef MINICL_TASK_SCHEDULER_H
#define MINICL_TASK_SCHEDULER_H
#include <assert.h>
#include "PlatformDefinitions.h"
#include <stdlib.h>
#include "LinearMath/btAlignedObjectArray.h"
#include "MiniCLTask/MiniCLTask.h"
//just add your commands here, try to keep them globally unique for debugging purposes
#define CMD_SAMPLE_TASK_COMMAND 10
/// MiniCLTaskScheduler handles SPU processing of collision pairs.
/// When PPU issues a task, it will look for completed task buffers
/// PPU will do postprocessing, dependent on workunit output (not likely)
class MiniCLTaskScheduler
{
// track task buffers that are being used, and total busy tasks
btAlignedObjectArray<bool> m_taskBusy;
btAlignedObjectArray<MiniCLTaskDesc> m_spuSampleTaskDesc;
int m_numBusyTasks;
// the current task and the current entry to insert a new work unit
int m_currentTask;
bool m_initialized;
void postProcess(int taskId, int outputSize);
class btThreadSupportInterface* m_threadInterface;
int m_maxNumOutstandingTasks;
public:
MiniCLTaskScheduler(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks);
~MiniCLTaskScheduler();
///call initialize in the beginning of the frame, before addCollisionPairToTask
void initialize();
void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes);
///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
void flush();
class btThreadSupportInterface* getThreadSupportInterface()
{
return m_threadInterface;
}
int findProgramCommandIdByName(const char* programName) const
{
return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support
}
int getMaxNumOutstandingTasks() const
{
return m_maxNumOutstandingTasks;
}
};
struct MiniCLKernel
{
MiniCLTaskScheduler* m_scheduler;
int m_kernelProgramCommandId;
char m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
int m_argSizes[MINI_CL_MAX_ARG];
};
#if defined(USE_LIBSPE2) && defined(__SPU__)
////////////////////MAIN/////////////////////////////
#include "../SpuLibspe2Support.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <SpuFakeDma.h>
void * SamplelsMemoryFunc();
void SampleThreadFunc(void* userPtr,void* lsMemory);
//#define DEBUG_LIBSPE2_MAINLOOP
int main(unsigned long long speid, addr64 argp, addr64 envp)
{
printf("SPU is up \n");
ATTRIBUTE_ALIGNED128(btSpuStatus status);
ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
unsigned int received_message = Spu_Mailbox_Event_Nothing;
bool shutdown = false;
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
status.m_status = Spu_Status_Free;
status.m_lsMemory.p = SamplelsMemoryFunc();
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
while (!shutdown)
{
received_message = spu_read_in_mbox();
switch(received_message)
{
case Spu_Mailbox_Event_Shutdown:
shutdown = true;
break;
case Spu_Mailbox_Event_Task:
// refresh the status
#ifdef DEBUG_LIBSPE2_MAINLOOP
printf("SPU recieved Task \n");
#endif //DEBUG_LIBSPE2_MAINLOOP
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
btAssert(status.m_status==Spu_Status_Occupied);
cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
break;
case Spu_Mailbox_Event_Nothing:
default:
break;
}
// set to status free and wait for next task
status.m_status = Spu_Status_Free;
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
}
return 0;
}
//////////////////////////////////////////////////////
#endif
#endif // MINICL_TASK_SCHEDULER_H

View file

@ -1,9 +1,26 @@
#ifndef TYPE_DEFINITIONS_H
#define TYPE_DEFINITIONS_H
#ifndef BT_TYPE_DEFINITIONS_H
#define BT_TYPE_DEFINITIONS_H
///This file provides some platform/compiler checks for common definitions
#include "LinearMath/btScalar.h"
#include "LinearMath/btMinMax.h"
#ifdef WIN32
#ifdef PFX_USE_FREE_VECTORMATH
#include "physics_effects/base_level/base/pfx_vectormath_include.win32.h"
typedef Vectormath::Aos::Vector3 vmVector3;
typedef Vectormath::Aos::Quat vmQuat;
typedef Vectormath::Aos::Matrix3 vmMatrix3;
typedef Vectormath::Aos::Transform3 vmTransform3;
typedef Vectormath::Aos::Point3 vmPoint3;
#else
#include "vectormath/vmInclude.h"
#endif//PFX_USE_FREE_VECTORMATH
#ifdef _WIN32
typedef union
{
@ -19,7 +36,11 @@ typedef union
typedef unsigned char uint8_t;
#ifndef __PHYSICS_COMMON_H__
#ifndef PFX_USE_FREE_VECTORMATH
#ifndef __BT_SKIP_UINT64_H
typedef unsigned long int uint64_t;
#endif //__BT_SKIP_UINT64_H
#endif //PFX_USE_FREE_VECTORMATH
typedef unsigned int uint32_t;
#endif //__PHYSICS_COMMON_H__
typedef unsigned short uint16_t;
@ -52,31 +73,27 @@ typedef union
#include <stdio.h>
#define spu_printf printf
#define DWORD unsigned int
typedef union
{
unsigned long long ull;
unsigned int ui[2];
void *p;
} addr64;
#else
#include <stdio.h>
#define spu_printf printf
#endif // USE_LIBSPE2
#endif //__CELLOS_LV2__
#endif
#ifdef __SPU__
#include <stdio.h>
#define printf spu_printf
#endif
/* Included here because we need uint*_t typedefs */
#include "PpuAddressSpace.h"
#endif //TYPE_DEFINITIONS_H
#endif //BT_TYPE_DEFINITIONS_H

View file

@ -48,7 +48,7 @@ PosixThreadSupport::~PosixThreadSupport()
#endif
// this semaphore will signal, if and how many threads are finished with their work
static sem_t* mainSemaphore;
static sem_t* mainSemaphore=0;
static sem_t* createSem(const char* baseName)
{
@ -58,9 +58,10 @@ static sem_t* createSem(const char* baseName)
char name[32];
snprintf(name, 32, "/%s-%d-%4.4d", baseName, getpid(), semCount++);
sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
if (tempSem != reinterpret_cast<sem_t *>(SEM_FAILED))
{
//printf("Created \"%s\" Semaphore %x\n", name, tempSem);
// printf("Created \"%s\" Semaphore %p\n", name, tempSem);
}
else
{
@ -172,7 +173,7 @@ void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned in
// get at least one thread which has finished
size_t last = -1;
for(size_t t=0; t < m_activeSpuStatus.size(); ++t) {
for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
if(2 == m_activeSpuStatus[t].m_status) {
last = t;
break;
@ -199,7 +200,8 @@ void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstruction
m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
mainSemaphore = createSem("main");
//checkPThreadFunction(sem_wait(mainSemaphore));
for (int i=0;i < threadConstructionInfo.m_numThreads;i++)
{
printf("starting thread %d\n",i);
@ -233,17 +235,175 @@ void PosixThreadSupport::startSPU()
///tell the task scheduler we are done with the SPU tasks
void PosixThreadSupport::stopSPU()
{
for(size_t t=0; t < m_activeSpuStatus.size(); ++t) {
for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t)
{
btSpuStatus& spuStatus = m_activeSpuStatus[t];
printf("%s: Thread %i used: %ld\n", __FUNCTION__, t, spuStatus.threadUsed);
destroySem(spuStatus.startSemaphore);
checkPThreadFunction(pthread_cancel(spuStatus.thread));
}
destroySem(mainSemaphore);
printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), spuStatus.threadUsed);
spuStatus.m_userPtr = 0;
checkPThreadFunction(sem_post(spuStatus.startSemaphore));
checkPThreadFunction(sem_wait(mainSemaphore));
printf("destroy semaphore\n");
destroySem(spuStatus.startSemaphore);
printf("semaphore destroyed\n");
checkPThreadFunction(pthread_join(spuStatus.thread,0));
}
printf("destroy main semaphore\n");
destroySem(mainSemaphore);
printf("main semaphore destroyed\n");
m_activeSpuStatus.clear();
}
class PosixCriticalSection : public btCriticalSection
{
pthread_mutex_t m_mutex;
public:
PosixCriticalSection()
{
pthread_mutex_init(&m_mutex, NULL);
}
virtual ~PosixCriticalSection()
{
pthread_mutex_destroy(&m_mutex);
}
ATTRIBUTE_ALIGNED16(unsigned int mCommonBuff[32]);
virtual unsigned int getSharedParam(int i)
{
return mCommonBuff[i];
}
virtual void setSharedParam(int i,unsigned int p)
{
mCommonBuff[i] = p;
}
virtual void lock()
{
pthread_mutex_lock(&m_mutex);
}
virtual void unlock()
{
pthread_mutex_unlock(&m_mutex);
}
};
#if defined(_POSIX_BARRIERS) && (_POSIX_BARRIERS - 20012L) >= 0
/* OK to use barriers on this platform */
class PosixBarrier : public btBarrier
{
pthread_barrier_t m_barr;
int m_numThreads;
public:
PosixBarrier()
:m_numThreads(0) { }
virtual ~PosixBarrier() {
pthread_barrier_destroy(&m_barr);
}
virtual void sync()
{
int rc = pthread_barrier_wait(&m_barr);
if(rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD)
{
printf("Could not wait on barrier\n");
exit(-1);
}
}
virtual void setMaxCount(int numThreads)
{
int result = pthread_barrier_init(&m_barr, NULL, numThreads);
m_numThreads = numThreads;
btAssert(result==0);
}
virtual int getMaxCount()
{
return m_numThreads;
}
};
#else
/* Not OK to use barriers on this platform - insert alternate code here */
class PosixBarrier : public btBarrier
{
pthread_mutex_t m_mutex;
pthread_cond_t m_cond;
int m_numThreads;
int m_called;
public:
PosixBarrier()
:m_numThreads(0)
{
}
virtual ~PosixBarrier()
{
if (m_numThreads>0)
{
pthread_mutex_destroy(&m_mutex);
pthread_cond_destroy(&m_cond);
}
}
virtual void sync()
{
pthread_mutex_lock(&m_mutex);
m_called++;
if (m_called == m_numThreads) {
m_called = 0;
pthread_cond_broadcast(&m_cond);
} else {
pthread_cond_wait(&m_cond,&m_mutex);
}
pthread_mutex_unlock(&m_mutex);
}
virtual void setMaxCount(int numThreads)
{
if (m_numThreads>0)
{
pthread_mutex_destroy(&m_mutex);
pthread_cond_destroy(&m_cond);
}
m_called = 0;
pthread_mutex_init(&m_mutex,NULL);
pthread_cond_init(&m_cond,NULL);
m_numThreads = numThreads;
}
virtual int getMaxCount()
{
return m_numThreads;
}
};
#endif//_POSIX_BARRIERS
btBarrier* PosixThreadSupport::createBarrier()
{
PosixBarrier* barrier = new PosixBarrier();
barrier->setMaxCount(getNumTasks());
return barrier;
}
btCriticalSection* PosixThreadSupport::createCriticalSection()
{
return new PosixCriticalSection();
}
void PosixThreadSupport::deleteBarrier(btBarrier* barrier)
{
delete barrier;
}
void PosixThreadSupport::deleteCriticalSection(btCriticalSection* cs)
{
delete cs;
}
#endif // USE_PTHREADS

View file

@ -13,16 +13,22 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_POSIX_THREAD_SUPPORT_H
#define BT_POSIX_THREAD_SUPPORT_H
#include "LinearMath/btScalar.h"
#include "PlatformDefinitions.h"
#ifdef USE_PTHREADS //platform specific defines are defined in PlatformDefinitions.h
#ifdef USE_PTHREADS //platform specifc defines are defined in PlatformDefinitions.h
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
#endif //_XOPEN_SOURCE
#include <pthread.h>
#include <semaphore.h>
#ifndef POSIX_THREAD_SUPPORT_H
#define POSIX_THREAD_SUPPORT_H
#include "LinearMath/btAlignedObjectArray.h"
@ -68,7 +74,7 @@ public:
struct ThreadConstructionInfo
{
ThreadConstructionInfo(char* uniqueName,
ThreadConstructionInfo(const char* uniqueName,
PosixThreadFunc userThreadFunc,
PosixlsMemorySetupFunc lsMemoryFunc,
int numThreads=1,
@ -83,7 +89,7 @@ public:
}
char* m_uniqueName;
const char* m_uniqueName;
PosixThreadFunc m_userThreadFunc;
PosixlsMemorySetupFunc m_lsMemoryFunc;
int m_numThreads;
@ -117,8 +123,25 @@ public:
{
return m_activeSpuStatus.size();
}
virtual btBarrier* createBarrier();
virtual btCriticalSection* createCriticalSection();
virtual void deleteBarrier(btBarrier* barrier);
virtual void deleteCriticalSection(btCriticalSection* criticalSection);
virtual void* getThreadLocalMemory(int taskId)
{
return m_activeSpuStatus[taskId].m_lsMemory;
}
};
#endif // POSIX_THREAD_SUPPORT_H
#endif // USE_PTHREADS
#endif // BT_POSIX_THREAD_SUPPORT_H

View file

@ -1,18 +1,37 @@
#ifndef __PPU_ADDRESS_SPACE_H
#define __PPU_ADDRESS_SPACE_H
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2010 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifdef WIN32
#ifndef BT_PPU_ADDRESS_SPACE_H
#define BT_PPU_ADDRESS_SPACE_H
#ifdef _WIN32
//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
#pragma warning (disable: 4311)
#pragma warning (disable: 4312)
#endif //WIN32
#endif //_WIN32
#ifdef USE_ADDR64
typedef uint64_t ppu_address_t;
#if defined(_WIN64)
typedef unsigned __int64 ppu_address_t;
#elif defined(__LP64__) || defined(__x86_64__)
typedef uint64_t ppu_address_t;
#else
typedef uint32_t ppu_address_t;
#endif
typedef uint32_t ppu_address_t;
#endif //defined(_WIN64)
#endif
#endif //BT_PPU_ADDRESS_SPACE_H

View file

@ -91,3 +91,91 @@ void SequentialThreadSupport::setNumTasks(int numTasks)
{
printf("SequentialThreadSupport::setNumTasks(%d) is not implemented and has no effect\n",numTasks);
}
class btDummyBarrier : public btBarrier
{
private:
public:
btDummyBarrier()
{
}
virtual ~btDummyBarrier()
{
}
void sync()
{
}
virtual void setMaxCount(int n) {}
virtual int getMaxCount() {return 1;}
};
class btDummyCriticalSection : public btCriticalSection
{
public:
btDummyCriticalSection()
{
}
virtual ~btDummyCriticalSection()
{
}
unsigned int getSharedParam(int i)
{
btAssert(i>=0&&i<31);
return mCommonBuff[i+1];
}
void setSharedParam(int i,unsigned int p)
{
btAssert(i>=0&&i<31);
mCommonBuff[i+1] = p;
}
void lock()
{
mCommonBuff[0] = 1;
}
void unlock()
{
mCommonBuff[0] = 0;
}
};
btBarrier* SequentialThreadSupport::createBarrier()
{
return new btDummyBarrier();
}
btCriticalSection* SequentialThreadSupport::createCriticalSection()
{
return new btDummyCriticalSection();
}
void SequentialThreadSupport::deleteBarrier(btBarrier* barrier)
{
delete barrier;
}
void SequentialThreadSupport::deleteCriticalSection(btCriticalSection* criticalSection)
{
delete criticalSection;
}

View file

@ -17,8 +17,8 @@ subject to the following restrictions:
#include "PlatformDefinitions.h"
#ifndef SEQUENTIAL_THREAD_SUPPORT_H
#define SEQUENTIAL_THREAD_SUPPORT_H
#ifndef BT_SEQUENTIAL_THREAD_SUPPORT_H
#define BT_SEQUENTIAL_THREAD_SUPPORT_H
#include "LinearMath/btAlignedObjectArray.h"
@ -51,7 +51,7 @@ private:
public:
struct SequentialThreadConstructionInfo
{
SequentialThreadConstructionInfo (char* uniqueName,
SequentialThreadConstructionInfo (const char* uniqueName,
SequentialThreadFunc userThreadFunc,
SequentiallsMemorySetupFunc lsMemoryFunc
)
@ -62,7 +62,7 @@ public:
}
char* m_uniqueName;
const char* m_uniqueName;
SequentialThreadFunc m_userThreadFunc;
SequentiallsMemorySetupFunc m_lsMemoryFunc;
};
@ -85,8 +85,16 @@ public:
{
return 1;
}
virtual btBarrier* createBarrier();
virtual btCriticalSection* createCriticalSection();
virtual void deleteBarrier(btBarrier* barrier);
virtual void deleteCriticalSection(btCriticalSection* criticalSection);
};
#endif //SEQUENTIAL_THREAD_SUPPORT_H
#endif //BT_SEQUENTIAL_THREAD_SUPPORT_H

View file

@ -13,8 +13,8 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_COLLISION_OBJECT_WRAPPER_H
#define SPU_COLLISION_OBJECT_WRAPPER_H
#ifndef BT_SPU_COLLISION_OBJECT_WRAPPER_H
#define BT_SPU_COLLISION_OBJECT_WRAPPER_H
#include "PlatformDefinitions.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
@ -37,4 +37,4 @@ public:
};
#endif //SPU_COLLISION_OBJECT_WRAPPER_H
#endif //BT_SPU_COLLISION_OBJECT_WRAPPER_H

View file

@ -26,7 +26,7 @@ subject to the following restrictions:
void SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
{
if (m_maxNumOutstandingTasks != maxNumTasks)
if (int(m_maxNumOutstandingTasks) != maxNumTasks)
{
m_maxNumOutstandingTasks = maxNumTasks;
m_taskBusy.resize(m_maxNumOutstandingTasks);
@ -44,7 +44,6 @@ void SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
}
m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128);
m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*6, 128);
}
}
@ -69,7 +68,7 @@ m_maxNumOutstandingTasks(0)
m_threadInterface->startSPU();
//printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", sizeof(SpuGatherAndProcessWorkUnitInput));
printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", int(sizeof(SpuGatherAndProcessWorkUnitInput)));
}

View file

@ -13,12 +13,12 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_COLLISION_TASK_PROCESS_H
#define SPU_COLLISION_TASK_PROCESS_H
#ifndef BT_SPU_COLLISION_TASK_PROCESS_H
#define BT_SPU_COLLISION_TASK_PROCESS_H
#include <assert.h>
#include <LinearMath/btScalar.h>
#include "LinearMath/btScalar.h"
#include "PlatformDefinitions.h"
#include "LinearMath/btAlignedObjectArray.h"
@ -35,7 +35,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "BulletCollision/CollisionShapes/btConvexShape.h"
#include <LinearMath/btAlignedAllocator.h>
#include "LinearMath/btAlignedAllocator.h"
#include <stdio.h>
@ -159,5 +159,5 @@ public:
#define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES)
#endif // SPU_COLLISION_TASK_PROCESS_H
#endif // BT_SPU_COLLISION_TASK_PROCESS_H

View file

@ -22,7 +22,7 @@ subject to the following restrictions:
void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
void SpuContactManifoldCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
{
btAssert(0);
}
@ -34,7 +34,7 @@ btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollision
}
#ifndef __SPU__
SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObject* body0,const btCollisionObject* body1)
:btCollisionAlgorithm(ci)
#ifdef USE_SEPDISTANCE_UTIL
,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc())

View file

@ -13,14 +13,15 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#define SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#ifndef BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#define BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
#include "LinearMath/btTransformUtil.h"
#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
class btPersistentManifold;
@ -37,20 +38,20 @@ ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btColli
float m_collisionMargin0;
float m_collisionMargin1;
btCollisionObject* m_collisionObject0;
btCollisionObject* m_collisionObject1;
const btCollisionObject* m_collisionObject0;
const btCollisionObject* m_collisionObject1;
public:
virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObject* body0,const btCollisionObject* body1);
#ifdef USE_SEPDISTANCE_UTIL
btConvexSeparatingDistanceUtil m_sepDistance;
#endif //USE_SEPDISTANCE_UTIL
@ -68,12 +69,12 @@ public:
return m_manifoldPtr;
}
btCollisionObject* getCollisionObject0()
const btCollisionObject* getCollisionObject0()
{
return m_collisionObject0;
}
btCollisionObject* getCollisionObject1()
const btCollisionObject* getCollisionObject1()
{
return m_collisionObject1;
}
@ -108,13 +109,13 @@ public:
struct CreateFunc :public btCollisionAlgorithmCreateFunc
{
virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
{
virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
{
void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm));
return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1);
return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
}
};
};
#endif //SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#endif //BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H

View file

@ -1,8 +1,24 @@
#ifndef DOUBLE_BUFFER_H
#define DOUBLE_BUFFER_H
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_DOUBLE_BUFFER_H
#define BT_DOUBLE_BUFFER_H
#include "SpuFakeDma.h"
#include <LinearMath/btScalar.h>
#include "LinearMath/btScalar.h"
///DoubleBuffer

View file

@ -30,7 +30,7 @@ void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag
cellDmaLargeGet(ls,ea,size,tag,tid,rid);
return ls;
#else
return (void*)(uint32_t)ea;
return (void*)(ppu_address_t)ea;
#endif
}
@ -40,7 +40,7 @@ void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag
mfc_get(ls,ea,size,tag,0,0);
return ls;
#else
return (void*)(uint32_t)ea;
return (void*)(ppu_address_t)ea;
#endif
}
@ -53,7 +53,7 @@ void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uin
cellDmaGet(ls,ea,size,tag,tid,rid);
return ls;
#else
return (void*)(uint32_t)ea;
return (void*)(ppu_address_t)ea;
#endif
}
@ -174,6 +174,9 @@ int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid,
{
char* mainMem = (char*)ea;
char* localStore = (char*)ls;
// printf("mainMem=%x, localStore=%x",mainMem,localStore);
#ifdef USE_MEMCPY
memcpy(localStore,mainMem,size);
#else
@ -182,6 +185,7 @@ int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid,
localStore[i] = mainMem[i];
}
#endif //#ifdef USE_MEMCPY
// printf(" finished\n");
return 0;
}

View file

@ -13,8 +13,8 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef FAKE_DMA_H
#define FAKE_DMA_H
#ifndef BT_FAKE_DMA_H
#define BT_FAKE_DMA_H
#include "PlatformDefinitions.h"
@ -132,4 +132,4 @@ void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uin
void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
#endif //FAKE_DMA_H
#endif //BT_FAKE_DMA_H

View file

@ -23,6 +23,8 @@ subject to the following restrictions:
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "LinearMath/btQuickprof.h"
#include "BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
@ -48,6 +50,7 @@ bool SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,i
// (proxyType0 == CONE_SHAPE_PROXYTYPE) ||
(proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
(proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)||
(proxyType0 == STATIC_PLANE_PROXYTYPE)||
(proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
);
@ -60,9 +63,11 @@ bool SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,i
// (proxyType1 == CONE_SHAPE_PROXYTYPE) ||
(proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
(proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) ||
(proxyType1 == STATIC_PLANE_PROXYTYPE) ||
(proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
);
return supported0 && supported1;
}
@ -124,8 +129,33 @@ public:
{
int proxyType0 = colObj0->getCollisionShape()->getShapeType();
int proxyType1 = colObj1->getCollisionShape()->getShapeType();
if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1))
bool supportsSpuDispatch = m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1)
&& ((colObj0->getCollisionFlags() & btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) == 0)
&& ((colObj1->getCollisionFlags() & btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) == 0);
if (proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
{
btCompoundShape* compound = (btCompoundShape*)colObj0->getCollisionShape();
if (compound->getNumChildShapes()>MAX_SPU_COMPOUND_SUBSHAPES)
{
//printf("PPU fallback, compound->getNumChildShapes(%d)>%d\n",compound->getNumChildShapes(),MAX_SPU_COMPOUND_SUBSHAPES);
supportsSpuDispatch = false;
}
}
if (proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
{
btCompoundShape* compound = (btCompoundShape*)colObj1->getCollisionShape();
if (compound->getNumChildShapes()>MAX_SPU_COMPOUND_SUBSHAPES)
{
//printf("PPU fallback, compound->getNumChildShapes(%d)>%d\n",compound->getNumChildShapes(),MAX_SPU_COMPOUND_SUBSHAPES);
supportsSpuDispatch = false;
}
}
if (supportsSpuDispatch)
{
int so = sizeof(SpuContactManifoldCollisionAlgorithm);
#ifdef ALLOCATE_SEPARATELY
void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so);
@ -136,7 +166,10 @@ public:
collisionPair.m_internalTmpValue = 2;
} else
{
collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1);
btCollisionObjectWrapper ob0(0,colObj0->getCollisionShape(),colObj0,colObj0->getWorldTransform());
btCollisionObjectWrapper ob1(0,colObj1->getCollisionShape(),colObj1,colObj1->getWorldTransform());
collisionPair.m_algorithm = m_dispatcher->findAlgorithm(&ob0,&ob1);
collisionPair.m_internalTmpValue = 3;
}
}
@ -175,48 +208,60 @@ void SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPai
//send one big batch
int numTotalPairs = pairCache->getNumOverlappingPairs();
btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
int i;
if (numTotalPairs)
{
BT_PROFILE("addWorkToTask");
for (i=0;i<numTotalPairs;)
btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
int i;
{
//Performance Hint: tweak this number during benchmarking
static const int pairRange = SPU_BATCHSIZE_BROADPHASE_PAIRS;
int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
i = endIndex;
}
}
{
BT_PROFILE("PPU fallback");
//handle PPU fallback pairs
for (i=0;i<numTotalPairs;i++)
{
btBroadphasePair& collisionPair = pairPtr[i];
if (collisionPair.m_internalTmpValue == 3)
int pairRange = SPU_BATCHSIZE_BROADPHASE_PAIRS;
if (numTotalPairs < (m_spuCollisionTaskProcess->getNumTasks()*SPU_BATCHSIZE_BROADPHASE_PAIRS))
{
if (collisionPair.m_algorithm)
pairRange = (numTotalPairs/m_spuCollisionTaskProcess->getNumTasks())+1;
}
BT_PROFILE("addWorkToTask");
for (i=0;i<numTotalPairs;)
{
//Performance Hint: tweak this number during benchmarking
int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
i = endIndex;
}
}
{
BT_PROFILE("PPU fallback");
//handle PPU fallback pairs
for (i=0;i<numTotalPairs;i++)
{
btBroadphasePair& collisionPair = pairPtr[i];
if (collisionPair.m_internalTmpValue == 3)
{
btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
if (dispatcher->needsCollision(colObj0,colObj1))
if (collisionPair.m_algorithm)
{
btManifoldResult contactPointResult(colObj0,colObj1);
if (dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE)
btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
if (dispatcher->needsCollision(colObj0,colObj1))
{
//discrete collision detection query
collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
} else
{
//continuous collision detection query, time of impact (toi)
btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
if (dispatchInfo.m_timeOfImpact > toi)
dispatchInfo.m_timeOfImpact = toi;
//discrete collision detection query
btCollisionObjectWrapper ob0(0,colObj0->getCollisionShape(),colObj0,colObj0->getWorldTransform());
btCollisionObjectWrapper ob1(0,colObj1->getCollisionShape(),colObj1,colObj1->getWorldTransform());
btManifoldResult contactPointResult(&ob0,&ob1);
if (dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE)
{
collisionPair.m_algorithm->processCollision(&ob0,&ob1,dispatchInfo,&contactPointResult);
} else
{
//continuous collision detection query, time of impact (toi)
btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
if (dispatchInfo.m_timeOfImpact > toi)
dispatchInfo.m_timeOfImpact = toi;
}
}
}
}

View file

@ -12,8 +12,8 @@ subject to the following restrictions:
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_GATHERING_COLLISION__DISPATCHER_H
#define SPU_GATHERING_COLLISION__DISPATCHER_H
#ifndef BT_SPU_GATHERING_COLLISION__DISPATCHER_H
#define BT_SPU_GATHERING_COLLISION__DISPATCHER_H
#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
@ -23,9 +23,10 @@ subject to the following restrictions:
///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 512
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024
@ -66,4 +67,6 @@ public:
#endif //SPU_GATHERING_COLLISION__DISPATCHER_H
#endif //BT_SPU_GATHERING_COLLISION__DISPATCHER_H

View file

@ -14,8 +14,8 @@ subject to the following restrictions:
*/
#ifndef SPU_LIBSPE2_SUPPORT_H
#define SPU_LIBSPE2_SUPPORT_H
#ifndef BT_SPU_LIBSPE2_SUPPORT_H
#define BT_SPU_LIBSPE2_SUPPORT_H
#include <LinearMath/btScalar.h> //for uint32_t etc.
@ -173,7 +173,7 @@ private:
#endif //USE_LIBSPE2
#endif //SPU_LIBSPE2_SUPPORT_H
#endif //BT_SPU_LIBSPE2_SUPPORT_H

View file

@ -24,11 +24,11 @@ subject to the following restrictions:
#include <math.h>
//#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
#include <vectormath_aos.h>
#include "../PlatformDefinitions.h"
using namespace Vectormath::Aos;
enum FeatureType { F, E, V };
@ -39,21 +39,21 @@ enum FeatureType { F, E, V };
class Box
{
public:
Vector3 half;
vmVector3 mHalf;
inline Box()
{}
inline Box(PE_REF(Vector3) half_);
inline Box(PE_REF(vmVector3) half_);
inline Box(float hx, float hy, float hz);
inline void Set(PE_REF(Vector3) half_);
inline void Set(PE_REF(vmVector3) half_);
inline void Set(float hx, float hy, float hz);
inline Vector3 GetAABB(const Matrix3& rotation) const;
inline vmVector3 GetAABB(const vmMatrix3& rotation) const;
};
inline
Box::Box(PE_REF(Vector3) half_)
Box::Box(PE_REF(vmVector3) half_)
{
Set(half_);
}
@ -66,23 +66,23 @@ Box::Box(float hx, float hy, float hz)
inline
void
Box::Set(PE_REF(Vector3) half_)
Box::Set(PE_REF(vmVector3) half_)
{
half = half_;
mHalf = half_;
}
inline
void
Box::Set(float hx, float hy, float hz)
{
half = Vector3(hx, hy, hz);
mHalf = vmVector3(hx, hy, hz);
}
inline
Vector3
Box::GetAABB(const Matrix3& rotation) const
vmVector3
Box::GetAABB(const vmMatrix3& rotation) const
{
return absPerElem(rotation) * half;
return absPerElem(rotation) * mHalf;
}
//-------------------------------------------------------------------------------------------------
@ -95,7 +95,7 @@ class BoxPoint
public:
BoxPoint() : localPoint(0.0f) {}
Point3 localPoint;
vmPoint3 localPoint;
FeatureType featureType;
int featureIdx;

View file

@ -44,7 +44,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
const btTransform& t = xform;
btMatrix3x3 abs_b = t.getBasis().absolute();
btVector3 center = t.getOrigin();
btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
btVector3 extent = halfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
aabbMin = center - extent;
aabbMax = center + extent;
@ -67,7 +67,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
const btTransform& t = xform;
btMatrix3x3 abs_b = t.getBasis().absolute();
btVector3 center = t.getOrigin();
btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
btVector3 extent = halfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
aabbMin = center - extent;
aabbMax = center + extent;
@ -198,6 +198,12 @@ int getShapeTypeSize(int shapeType)
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case STATIC_PLANE_PROXYTYPE:
{
int shapeSize = sizeof(btStaticPlaneShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
default:
btAssert(0);
@ -225,6 +231,7 @@ void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionSha
{
register int dmaSize = getShapeTypeSize(shapeType);
cellDmaGet(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0);
//cellDmaGetReadOnly(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0);
//cellDmaWaitTagStatusAll(DMA_MASK(dmaTag));
}

View file

@ -20,6 +20,7 @@ subject to the following restrictions:
#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
#include "BulletCollision/CollisionShapes/btCylinderShape.h"
#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
@ -32,7 +33,9 @@ subject to the following restrictions:
#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
#include "BulletCollision/CollisionShapes/btCompoundShape.h"
#define MAX_NUM_SPU_CONVEX_POINTS 128
#define MAX_NUM_SPU_CONVEX_POINTS 128 //@fallback to PPU if a btConvexHullShape has more than MAX_NUM_SPU_CONVEX_POINTS points
#define MAX_SPU_COMPOUND_SUBSHAPES 16 //@fallback on PPU if compound has more than MAX_SPU_COMPOUND_SUBSHAPES child shapes
#define MAX_SHAPE_SIZE 256 //@todo: assert on this
ATTRIBUTE_ALIGNED16(struct) SpuConvexPolyhedronVertexData
{
@ -43,7 +46,7 @@ ATTRIBUTE_ALIGNED16(struct) SpuConvexPolyhedronVertexData
ATTRIBUTE_ALIGNED16(btVector3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]);
};
#define MAX_SHAPE_SIZE 256
ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
{
@ -53,7 +56,7 @@ ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
ATTRIBUTE_ALIGNED16(struct) CompoundShape_LocalStoreMemory
{
// Compound data
#define MAX_SPU_COMPOUND_SUBSHAPES 16
ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]);
ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]);
};

View file

@ -17,6 +17,12 @@ subject to the following restrictions:
//#define DEBUG_SPU_COLLISION_DETECTION 1
#ifdef DEBUG_SPU_COLLISION_DETECTION
#ifndef __SPU__
#include <stdio.h>
#define spu_printf printf
#endif
#endif //DEBUG_SPU_COLLISION_DETECTION
SpuContactResult::SpuContactResult()
{
@ -99,50 +105,50 @@ bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
if (depth > manifoldPtr->getContactBreakingThreshold())
return false;
//provide inverses or just calculate?
btTransform transAInv = transA.inverse();//m_body0->m_cachedInvertedWorldTransform;
btTransform transBInv= transB.inverse();//m_body1->m_cachedInvertedWorldTransform;
//if (depth > manifoldPtr->getContactProcessingThreshold())
// return false;
btVector3 pointA;
btVector3 localA;
btVector3 localB;
btVector3 normal;
if (isSwapped)
{
normal = normalOnBInWorld * -1;
pointA = pointInWorld + normal * depth;
localA = transAInv(pointA );
localB = transBInv(pointInWorld);
/*localA = transBInv(pointA );
localB = transAInv(pointInWorld);*/
localA = transA.invXform(pointA );
localB = transB.invXform(pointInWorld);
}
else
{
normal = normalOnBInWorld;
pointA = pointInWorld + normal * depth;
localA = transAInv(pointA );
localB = transBInv(pointInWorld);
localA = transA.invXform(pointA );
localB = transB.invXform(pointInWorld);
}
btManifoldPoint newPt(localA,localB,normal,depth);
newPt.m_positionWorldOnA = pointA;
newPt.m_positionWorldOnB = pointInWorld;
newPt.m_combinedFriction = combinedFriction;
newPt.m_combinedRestitution = combinedRestitution;
int insertIndex = manifoldPtr->getCacheEntry(newPt);
if (insertIndex >= 0)
{
// manifoldPtr->replaceContactPoint(newPt,insertIndex);
// return true;
#ifdef DEBUG_SPU_COLLISION_DETECTION
spu_printf("SPU: same contact detected, nothing done\n");
#endif //DEBUG_SPU_COLLISION_DETECTION
// This is not needed, just use the old info! saves a DMA transfer as well
// we need to replace the current contact point, otherwise small errors will accumulate (spheres start rolling etc)
manifoldPtr->replaceContactPoint(newPt,insertIndex);
return true;
} else
{
newPt.m_combinedFriction = combinedFriction;
newPt.m_combinedRestitution = combinedRestitution;
/*
///@todo: SPU callbacks, either immediate (local on the SPU), or deferred
//User can override friction and/or restitution
@ -155,6 +161,7 @@ bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
(*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1);
}
*/
manifoldPtr->addManifoldPoint(newPt);
return true;
@ -181,7 +188,12 @@ void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManif
void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
{
//spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
#ifdef DEBUG_SPU_COLLISION_DETECTION
spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
spu_printf("*** normal = %f,%f,%f\n",normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
spu_printf("*** position = %f,%f,%f\n",pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
#endif //DEBUG_SPU_COLLISION_DETECTION
#ifdef DEBUG_SPU_COLLISION_DETECTION
// int sman = sizeof(rage::phManifold);

View file

@ -17,7 +17,7 @@ subject to the following restrictions:
#define SPU_CONTACT_RESULT2_H
#ifndef WIN32
#ifndef _WIN32
#include <stdint.h>
#endif

View file

@ -24,7 +24,7 @@ class btStackAlloc;
class btIDebugDraw;
#include "BulletCollision/NarrowphaseCollision/btConvexPenetrationDepthSolver.h"
#include <LinearMath/btTransform.h>
#include "LinearMath/btTransform.h"
///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.

View file

@ -52,7 +52,7 @@ subject to the following restrictions:
#ifdef __SPU__
///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases
#ifndef USE_LIBSPE2
#define USE_SOFTWARE_CACHE 1
//#define USE_SOFTWARE_CACHE 1
#endif
#endif //__SPU__
@ -190,10 +190,27 @@ void* createCollisionLocalStoreMemory()
{
return &gLocalStoreMemory;
}
void deleteCollisionLocalStoreMemory()
{
}
#else
btAlignedObjectArray<CollisionTask_LocalStoreMemory*> sLocalStorePointers;
void* createCollisionLocalStoreMemory()
{
return new CollisionTask_LocalStoreMemory;
CollisionTask_LocalStoreMemory* localStore = new CollisionTask_LocalStoreMemory;
sLocalStorePointers.push_back(localStore);
return localStore;
}
void deleteCollisionLocalStoreMemory()
{
for (int i=0;i<sLocalStorePointers.size();i++)
{
delete sLocalStorePointers[i];
}
sLocalStorePointers.clear();
}
#endif
@ -372,6 +389,151 @@ public:
};
void btConvexPlaneCollideSingleContact (SpuCollisionPairInput* wuInput,CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts)
{
btConvexShape* convexShape = (btConvexShape*) wuInput->m_spuCollisionShapes[0];
btStaticPlaneShape* planeShape = (btStaticPlaneShape*) wuInput->m_spuCollisionShapes[1];
bool hasCollision = false;
const btVector3& planeNormal = planeShape->getPlaneNormal();
const btScalar& planeConstant = planeShape->getPlaneConstant();
btTransform convexWorldTransform = wuInput->m_worldTransform0;
btTransform convexInPlaneTrans;
convexInPlaneTrans= wuInput->m_worldTransform1.inverse() * convexWorldTransform;
btTransform planeInConvex;
planeInConvex= convexWorldTransform.inverse() * wuInput->m_worldTransform1;
//btVector3 vtx = convexShape->localGetSupportVertexWithoutMarginNonVirtual(planeInConvex.getBasis()*-planeNormal);
btVector3 vtx = convexShape->localGetSupportVertexNonVirtual(planeInConvex.getBasis()*-planeNormal);
btVector3 vtxInPlane = convexInPlaneTrans(vtx);
btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
btVector3 vtxInPlaneWorld = wuInput->m_worldTransform1 * vtxInPlaneProjected;
hasCollision = distance < lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold();
//resultOut->setPersistentManifold(m_manifoldPtr);
if (hasCollision)
{
/// report a contact. internally this will be kept persistent, and contact reduction is done
btVector3 normalOnSurfaceB =wuInput->m_worldTransform1.getBasis() * planeNormal;
btVector3 pOnB = vtxInPlaneWorld;
spuContacts.addContactPoint(normalOnSurfaceB,pOnB,distance);
}
}
void ProcessConvexPlaneSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
{
register int dmaSize = 0;
register ppu_address_t dmaPpuAddress2;
btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
///DMA in the vertices for convex shapes
ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
{
// spu_printf("SPU: DMA btConvexHullShape\n");
dmaSize = sizeof(btConvexHullShape);
dmaPpuAddress2 = wuInput->m_collisionShapes[0];
cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
//cellDmaWaitTagStatusAll(DMA_MASK(1));
}
if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
{
// spu_printf("SPU: DMA btConvexHullShape\n");
dmaSize = sizeof(btConvexHullShape);
dmaPpuAddress2 = wuInput->m_collisionShapes[1];
cellDmaGet(&convexHullShape1, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
//cellDmaWaitTagStatusAll(DMA_MASK(1));
}
if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
{
cellDmaWaitTagStatusAll(DMA_MASK(1));
dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
}
if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
{
cellDmaWaitTagStatusAll(DMA_MASK(1));
dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
}
btConvexPointCloudShape cpc0,cpc1;
if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
{
cellDmaWaitTagStatusAll(DMA_MASK(2));
lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
const btVector3& localScaling = ch->getLocalScalingNV();
cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
wuInput->m_spuCollisionShapes[0] = &cpc0;
}
if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
{
cellDmaWaitTagStatusAll(DMA_MASK(2));
lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
const btVector3& localScaling = ch->getLocalScalingNV();
cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
wuInput->m_spuCollisionShapes[1] = &cpc1;
}
// const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
// const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
// int shapeType0 = wuInput->m_shapeType0;
// int shapeType1 = wuInput->m_shapeType1;
float marginA = wuInput->m_collisionMargin0;
float marginB = wuInput->m_collisionMargin1;
SpuClosestPointInput cpInput;
cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
cpInput.m_transformA = wuInput->m_worldTransform0;
cpInput.m_transformB = wuInput->m_worldTransform1;
float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
ppu_address_t manifoldAddress = (ppu_address_t)manifold;
btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
lsMemPtr->getColObj1()->getWorldTransform(),
lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
wuInput->m_isSwapped);
btConvexPlaneCollideSingleContact(wuInput,lsMemPtr,spuContacts);
}
////////////////////////
/// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc)
///////////////////
@ -476,8 +638,9 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT
}
int stats[11]={0,0,0,0,0,0,0,0,0,0,0};
int degenerateStats[11]={0,0,0,0,0,0,0,0,0,0,0};
#define MAX_DEGENERATE_STATS 15
int stats[MAX_DEGENERATE_STATS]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int degenerateStats[MAX_DEGENERATE_STATS]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
////////////////////////
@ -613,8 +776,10 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa
{
btGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&simplexSolver,penetrationSolver);//&vsSolver,penetrationSolver);
gjk.getClosestPoints(cpInput,spuContacts,0);//,debugDraw);
btAssert(gjk.m_lastUsedMethod <MAX_DEGENERATE_STATS);
stats[gjk.m_lastUsedMethod]++;
btAssert(gjk.m_degenerateSimplex <MAX_DEGENERATE_STATS);
degenerateStats[gjk.m_degenerateSimplex]++;
#ifdef USE_SEPDISTANCE_UTIL
@ -719,16 +884,21 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
cellDmaWaitTagStatusAll(DMA_MASK(1));
int childShapeCount0 = spuCompoundShape0->getNumChildShapes();
btAssert(childShapeCount0< MAX_SPU_COMPOUND_SUBSHAPES);
int childShapeCount1 = spuCompoundShape1->getNumChildShapes();
btAssert(childShapeCount1< MAX_SPU_COMPOUND_SUBSHAPES);
// Start the N^2
for (int i = 0; i < childShapeCount0; ++i)
{
btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i];
btAssert(!btBroadphaseProxy::isCompound(childShape0.m_childShapeType));
for (int j = 0; j < childShapeCount1; ++j)
{
btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j];
btAssert(!btBroadphaseProxy::isCompound(childShape1.m_childShapeType));
/* Create a new collision pair input struct using the two child shapes */
SpuCollisionPairInput cinput (collisionPairInput);
@ -741,9 +911,10 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
cinput.m_shapeType1 = childShape1.m_childShapeType;
cinput.m_collisionMargin1 = childShape1.m_childMargin;
/* Recursively call handleCollisionPair () with new collision pair input */
handleCollisionPair(cinput, lsMem, spuContacts,
(ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i],
(ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false); // bug fix: changed index to j.
(ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false);
}
}
}
@ -761,11 +932,12 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
cellDmaWaitTagStatusAll(DMA_MASK(1));
int childShapeCount = spuCompoundShape->getNumChildShapes();
btAssert(childShapeCount< MAX_SPU_COMPOUND_SUBSHAPES);
for (int i = 0; i < childShapeCount; ++i)
{
btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
// Dma the child shape
dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
cellDmaWaitTagStatusAll(DMA_MASK(1));
@ -793,10 +965,13 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
cellDmaWaitTagStatusAll(DMA_MASK(1));
int childShapeCount = spuCompoundShape->getNumChildShapes();
btAssert(childShapeCount< MAX_SPU_COMPOUND_SUBSHAPES);
for (int i = 0; i < childShapeCount; ++i)
{
btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
// Dma the child shape
dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
cellDmaWaitTagStatusAll(DMA_MASK(1));
@ -845,17 +1020,33 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
}
btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
if (collisionPairInput.m_shapeType1 == STATIC_PLANE_PROXYTYPE)
{
btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
btStaticPlaneShape* planeShape= (btStaticPlaneShape*)collisionShape1Loc;
btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
collisionPairInput.m_primitiveDimensions0 = dim0;
collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
collisionPairInput.m_primitiveDimensions0 = dim0;
collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
collisionPairInput.m_spuCollisionShapes[1] = planeShape;
ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
ProcessConvexPlaneSpuCollision(&collisionPairInput,&lsMem,spuContacts);
} else
{
btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
collisionPairInput.m_primitiveDimensions0 = dim0;
collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
}
}
}
@ -1033,7 +1224,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
collisionPairInput.m_isSwapped);
float distance=0.f;
//float distance=0.f;
btVector3 normalInB;
@ -1054,38 +1245,64 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
/*
//Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
vmVector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
vmVector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
vmMatrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
vmMatrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
Transform3 transformA(vmMatrix0,vmPos0);
vmTransform3 transformA(vmMatrix0,vmPos0);
Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
Transform3 transformB(vmMatrix1,vmPos1);
vmTransform3 transformB(vmMatrix1,vmPos1);
BoxPoint resultClosestBoxPointA;
BoxPoint resultClosestBoxPointB;
Vector3 resultNormal;
vmVector3 resultNormal;
*/
#ifdef USE_SEPDISTANCE_UTIL
float distanceThreshold = FLT_MAX
#else
float distanceThreshold = 0.f;
//float distanceThreshold = 0.f;
#endif
distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold);
vmVector3 n;
Box boxA;
vmVector3 hA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
vmVector3 hB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
boxA.mHalf= hA;
vmTransform3 trA;
trA.setTranslation(getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()));
trA.setUpper3x3(getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()));
Box boxB;
boxB.mHalf = hB;
vmTransform3 trB;
trB.setTranslation(getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()));
trB.setUpper3x3(getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()));
normalInB = -getBtVector3(resultNormal);
float distanceThreshold = spuManifold->getContactBreakingThreshold();//0.001f;
if(distance < spuManifold->getContactBreakingThreshold())
BoxPoint ptA,ptB;
float dist = boxBoxDistance(n, ptA, ptB,
boxA, trA, boxB, trB,
distanceThreshold );
// float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold);
normalInB = -getBtVector3(n);//resultNormal);
//if(dist < distanceThreshold)//spuManifold->getContactBreakingThreshold())
if(dist < spuManifold->getContactBreakingThreshold())
{
btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint));
btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(ptB.localPoint));
spuContacts.addContactPoint(
normalInB,
pointOnB,
distance);
dist);
}
}
#else
@ -1163,7 +1380,9 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
#endif //USE_SEPDISTANCE_UTIL
)
{
handleCollisionPair(collisionPairInput, lsMem, spuContacts, (ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape, (ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
handleCollisionPair(collisionPairInput, lsMem, spuContacts,
(ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,
(ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
} else
{
//spu_printf("boxbox dist = %f\n",distance);
@ -1209,3 +1428,5 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
return;
}

View file

@ -47,7 +47,7 @@ __attribute__ ((aligned (128)))
void processCollisionTask(void* userPtr, void* lsMemory);
void* createCollisionLocalStoreMemory();
void deleteCollisionLocalStoreMemory();
#if defined(USE_LIBSPE2) && defined(__SPU__)
#include "../SpuLibspe2Support.h"

Some files were not shown because too many files have changed in this diff Show more