mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-04-27 15:25:40 +00:00
Bullet 2.82 update
This commit is contained in:
parent
d0a64026b0
commit
416c50690e
146 changed files with 12202 additions and 1422 deletions
|
|
@ -54,7 +54,10 @@ IF (INSTALL_LIBS)
|
|||
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
|
||||
INSTALL(TARGETS LinearMath DESTINATION .)
|
||||
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
|
||||
INSTALL(TARGETS LinearMath DESTINATION lib${LIB_SUFFIX})
|
||||
INSTALL(TARGETS LinearMath
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib${LIB_SUFFIX}
|
||||
ARCHIVE DESTINATION lib${LIB_SUFFIX})
|
||||
INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h" PATTERN
|
||||
".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
|
||||
|
|
|
|||
|
|
@ -1931,11 +1931,15 @@ void btConvexHullInternal::merge(IntermediateHull& h0, IntermediateHull& h1)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static bool pointCmp(const btConvexHullInternal::Point32& p, const btConvexHullInternal::Point32& q)
|
||||
class pointCmp
|
||||
{
|
||||
return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
|
||||
}
|
||||
public:
|
||||
|
||||
bool operator() ( const btConvexHullInternal::Point32& p, const btConvexHullInternal::Point32& q ) const
|
||||
{
|
||||
return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
|
||||
}
|
||||
};
|
||||
|
||||
void btConvexHullInternal::compute(const void* coords, bool doubleCoords, int stride, int count)
|
||||
{
|
||||
|
|
@ -2026,7 +2030,7 @@ void btConvexHullInternal::compute(const void* coords, bool doubleCoords, int st
|
|||
points[i].index = i;
|
||||
}
|
||||
}
|
||||
points.quickSort(pointCmp);
|
||||
points.quickSort(pointCmp());
|
||||
|
||||
vertexPool.reset();
|
||||
vertexPool.setArraySize(count);
|
||||
|
|
|
|||
|
|
@ -62,29 +62,17 @@ class btIDebugDraw
|
|||
|
||||
virtual void drawSphere(btScalar radius, const btTransform& transform, const btVector3& color)
|
||||
{
|
||||
btVector3 start = transform.getOrigin();
|
||||
|
||||
const btVector3 xoffs = transform.getBasis() * btVector3(radius,0,0);
|
||||
const btVector3 yoffs = transform.getBasis() * btVector3(0,radius,0);
|
||||
const btVector3 zoffs = transform.getBasis() * btVector3(0,0,radius);
|
||||
|
||||
// XY
|
||||
drawLine(start-xoffs, start+yoffs, color);
|
||||
drawLine(start+yoffs, start+xoffs, color);
|
||||
drawLine(start+xoffs, start-yoffs, color);
|
||||
drawLine(start-yoffs, start-xoffs, color);
|
||||
|
||||
// XZ
|
||||
drawLine(start-xoffs, start+zoffs, color);
|
||||
drawLine(start+zoffs, start+xoffs, color);
|
||||
drawLine(start+xoffs, start-zoffs, color);
|
||||
drawLine(start-zoffs, start-xoffs, color);
|
||||
|
||||
// YZ
|
||||
drawLine(start-yoffs, start+zoffs, color);
|
||||
drawLine(start+zoffs, start+yoffs, color);
|
||||
drawLine(start+yoffs, start-zoffs, color);
|
||||
drawLine(start-zoffs, start-yoffs, color);
|
||||
|
||||
btVector3 center = transform.getOrigin();
|
||||
btVector3 up = transform.getBasis().getColumn(1);
|
||||
btVector3 axis = transform.getBasis().getColumn(0);
|
||||
btScalar minTh = -SIMD_HALF_PI;
|
||||
btScalar maxTh = SIMD_HALF_PI;
|
||||
btScalar minPs = -SIMD_HALF_PI;
|
||||
btScalar maxPs = SIMD_HALF_PI;
|
||||
btScalar stepDegrees = 30.f;
|
||||
drawSpherePatch(center, up, axis, radius,minTh, maxTh, minPs, maxPs, color, stepDegrees ,false);
|
||||
drawSpherePatch(center, up, -axis, radius,minTh, maxTh, minPs, maxPs, color, stepDegrees,false );
|
||||
}
|
||||
|
||||
virtual void drawSphere (const btVector3& p, btScalar radius, const btVector3& color)
|
||||
|
|
@ -179,7 +167,7 @@ class btIDebugDraw
|
|||
}
|
||||
}
|
||||
virtual void drawSpherePatch(const btVector3& center, const btVector3& up, const btVector3& axis, btScalar radius,
|
||||
btScalar minTh, btScalar maxTh, btScalar minPs, btScalar maxPs, const btVector3& color, btScalar stepDegrees = btScalar(10.f))
|
||||
btScalar minTh, btScalar maxTh, btScalar minPs, btScalar maxPs, const btVector3& color, btScalar stepDegrees = btScalar(10.f),bool drawCenter = true)
|
||||
{
|
||||
btVector3 vA[74];
|
||||
btVector3 vB[74];
|
||||
|
|
@ -261,18 +249,22 @@ class btIDebugDraw
|
|||
{
|
||||
drawLine(npole, pvB[j], color);
|
||||
}
|
||||
if(isClosed)
|
||||
|
||||
if (drawCenter)
|
||||
{
|
||||
if(j == (n_vert-1))
|
||||
if(isClosed)
|
||||
{
|
||||
drawLine(arcStart, pvB[j], color);
|
||||
if(j == (n_vert-1))
|
||||
{
|
||||
drawLine(arcStart, pvB[j], color);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(((!i) || (i == (n_hor-1))) && ((!j) || (j == (n_vert-1))))
|
||||
else
|
||||
{
|
||||
drawLine(center, pvB[j], color);
|
||||
if(((!i) || (i == (n_hor-1))) && ((!j) || (j == (n_vert-1))))
|
||||
{
|
||||
drawLine(center, pvB[j], color);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -314,6 +306,8 @@ class btIDebugDraw
|
|||
|
||||
virtual void drawCapsule(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
|
||||
{
|
||||
int stepDegrees = 30;
|
||||
|
||||
btVector3 capStart(0.f,0.f,0.f);
|
||||
capStart[upAxis] = -halfHeight;
|
||||
|
||||
|
|
@ -325,34 +319,47 @@ class btIDebugDraw
|
|||
|
||||
btTransform childTransform = transform;
|
||||
childTransform.getOrigin() = transform * capStart;
|
||||
drawSphere(radius, childTransform, color);
|
||||
{
|
||||
btVector3 center = childTransform.getOrigin();
|
||||
btVector3 up = childTransform.getBasis().getColumn((upAxis+1)%3);
|
||||
btVector3 axis = -childTransform.getBasis().getColumn(upAxis);
|
||||
btScalar minTh = -SIMD_HALF_PI;
|
||||
btScalar maxTh = SIMD_HALF_PI;
|
||||
btScalar minPs = -SIMD_HALF_PI;
|
||||
btScalar maxPs = SIMD_HALF_PI;
|
||||
|
||||
drawSpherePatch(center, up, axis, radius,minTh, maxTh, minPs, maxPs, color, btScalar(stepDegrees) ,false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
btTransform childTransform = transform;
|
||||
childTransform.getOrigin() = transform * capEnd;
|
||||
drawSphere(radius, childTransform, color);
|
||||
{
|
||||
btVector3 center = childTransform.getOrigin();
|
||||
btVector3 up = childTransform.getBasis().getColumn((upAxis+1)%3);
|
||||
btVector3 axis = childTransform.getBasis().getColumn(upAxis);
|
||||
btScalar minTh = -SIMD_HALF_PI;
|
||||
btScalar maxTh = SIMD_HALF_PI;
|
||||
btScalar minPs = -SIMD_HALF_PI;
|
||||
btScalar maxPs = SIMD_HALF_PI;
|
||||
drawSpherePatch(center, up, axis, radius,minTh, maxTh, minPs, maxPs, color, btScalar(stepDegrees) ,false);
|
||||
}
|
||||
}
|
||||
|
||||
// Draw some additional lines
|
||||
btVector3 start = transform.getOrigin();
|
||||
|
||||
capStart[(upAxis+1)%3] = radius;
|
||||
capEnd[(upAxis+1)%3] = radius;
|
||||
drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
|
||||
capStart[(upAxis+1)%3] = -radius;
|
||||
capEnd[(upAxis+1)%3] = -radius;
|
||||
drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
|
||||
|
||||
capStart[(upAxis+1)%3] = 0.f;
|
||||
capEnd[(upAxis+1)%3] = 0.f;
|
||||
|
||||
capStart[(upAxis+2)%3] = radius;
|
||||
capEnd[(upAxis+2)%3] = radius;
|
||||
drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
|
||||
capStart[(upAxis+2)%3] = -radius;
|
||||
capEnd[(upAxis+2)%3] = -radius;
|
||||
drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
|
||||
for (int i=0;i<360;i+=stepDegrees)
|
||||
{
|
||||
capEnd[(upAxis+1)%3] = capStart[(upAxis+1)%3] = btSin(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
|
||||
capEnd[(upAxis+2)%3] = capStart[(upAxis+2)%3] = btCos(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
|
||||
drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
virtual void drawCylinder(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
|
||||
|
|
@ -360,11 +367,18 @@ class btIDebugDraw
|
|||
btVector3 start = transform.getOrigin();
|
||||
btVector3 offsetHeight(0,0,0);
|
||||
offsetHeight[upAxis] = halfHeight;
|
||||
btVector3 offsetRadius(0,0,0);
|
||||
offsetRadius[(upAxis+1)%3] = radius;
|
||||
drawLine(start+transform.getBasis() * (offsetHeight+offsetRadius),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
|
||||
drawLine(start+transform.getBasis() * (offsetHeight-offsetRadius),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
|
||||
int stepDegrees=30;
|
||||
btVector3 capStart(0.f,0.f,0.f);
|
||||
capStart[upAxis] = -halfHeight;
|
||||
btVector3 capEnd(0.f,0.f,0.f);
|
||||
capEnd[upAxis] = halfHeight;
|
||||
|
||||
for (int i=0;i<360;i+=stepDegrees)
|
||||
{
|
||||
capEnd[(upAxis+1)%3] = capStart[(upAxis+1)%3] = btSin(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
|
||||
capEnd[(upAxis+2)%3] = capStart[(upAxis+2)%3] = btCos(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
|
||||
drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
|
||||
}
|
||||
// Drawing top and bottom caps of the cylinder
|
||||
btVector3 yaxis(0,0,0);
|
||||
yaxis[upAxis] = btScalar(1.0);
|
||||
|
|
@ -376,16 +390,28 @@ class btIDebugDraw
|
|||
|
||||
virtual void drawCone(btScalar radius, btScalar height, int upAxis, const btTransform& transform, const btVector3& color)
|
||||
{
|
||||
|
||||
int stepDegrees = 30;
|
||||
btVector3 start = transform.getOrigin();
|
||||
|
||||
btVector3 offsetHeight(0,0,0);
|
||||
offsetHeight[upAxis] = height * btScalar(0.5);
|
||||
btScalar halfHeight = height * btScalar(0.5);
|
||||
offsetHeight[upAxis] = halfHeight;
|
||||
btVector3 offsetRadius(0,0,0);
|
||||
offsetRadius[(upAxis+1)%3] = radius;
|
||||
btVector3 offset2Radius(0,0,0);
|
||||
offset2Radius[(upAxis+2)%3] = radius;
|
||||
|
||||
|
||||
btVector3 capEnd(0.f,0.f,0.f);
|
||||
capEnd[upAxis] = -halfHeight;
|
||||
|
||||
for (int i=0;i<360;i+=stepDegrees)
|
||||
{
|
||||
capEnd[(upAxis+1)%3] = btSin(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
|
||||
capEnd[(upAxis+2)%3] = btCos(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
|
||||
drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * capEnd, color);
|
||||
}
|
||||
|
||||
drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
|
||||
drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
|
||||
drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offset2Radius),color);
|
||||
|
|
|
|||
|
|
@ -22,10 +22,15 @@ subject to the following restrictions:
|
|||
|
||||
#ifdef BT_USE_SSE
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
|
||||
const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
|
||||
#define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
|
||||
#endif
|
||||
|
||||
#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
|
||||
#if defined(BT_USE_SSE)
|
||||
#define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
|
||||
#define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
|
||||
#define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
|
||||
#elif defined(BT_USE_NEON)
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
|
||||
|
|
@ -207,7 +212,7 @@ public:
|
|||
btFullAssert(d != btScalar(0.0));
|
||||
btScalar s = btScalar(2.0) / d;
|
||||
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vs, Q = q.get128();
|
||||
__m128i Qi = btCastfTo128i(Q);
|
||||
__m128 Y, Z;
|
||||
|
|
@ -341,7 +346,7 @@ public:
|
|||
* @param m The array to be filled */
|
||||
void getOpenGLSubMatrix(btScalar *m) const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 v0 = m_el[0].mVec128;
|
||||
__m128 v1 = m_el[1].mVec128;
|
||||
__m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
|
||||
|
|
@ -362,7 +367,7 @@ public:
|
|||
vm[2] = v2;
|
||||
#elif defined(BT_USE_NEON)
|
||||
// note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
|
||||
static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
|
||||
static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
|
||||
float32x4_t *vm = (float32x4_t *)m;
|
||||
float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
|
||||
float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
|
||||
|
|
@ -740,7 +745,7 @@ public:
|
|||
SIMD_FORCE_INLINE btMatrix3x3&
|
||||
btMatrix3x3::operator*=(const btMatrix3x3& m)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 rv00, rv01, rv02;
|
||||
__m128 rv10, rv11, rv12;
|
||||
__m128 rv20, rv21, rv22;
|
||||
|
|
@ -953,7 +958,7 @@ btMatrix3x3::determinant() const
|
|||
SIMD_FORCE_INLINE btMatrix3x3
|
||||
btMatrix3x3::absolute() const
|
||||
{
|
||||
#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
return btMatrix3x3(
|
||||
_mm_and_ps(m_el[0].mVec128, btvAbsfMask),
|
||||
_mm_and_ps(m_el[1].mVec128, btvAbsfMask),
|
||||
|
|
@ -974,7 +979,7 @@ btMatrix3x3::absolute() const
|
|||
SIMD_FORCE_INLINE btMatrix3x3
|
||||
btMatrix3x3::transpose() const
|
||||
{
|
||||
#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
__m128 v0 = m_el[0].mVec128;
|
||||
__m128 v1 = m_el[1].mVec128;
|
||||
__m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
|
||||
|
|
@ -993,7 +998,7 @@ btMatrix3x3::transpose() const
|
|||
return btMatrix3x3( v0, v1, v2 );
|
||||
#elif defined(BT_USE_NEON)
|
||||
// note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
|
||||
static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
|
||||
static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
|
||||
float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
|
||||
float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
|
||||
float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
|
||||
|
|
@ -1031,7 +1036,7 @@ btMatrix3x3::inverse() const
|
|||
SIMD_FORCE_INLINE btMatrix3x3
|
||||
btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
|
||||
{
|
||||
#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
// zeros w
|
||||
// static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
|
||||
__m128 row = m_el[0].mVec128;
|
||||
|
|
@ -1053,7 +1058,7 @@ btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
|
|||
|
||||
#elif defined BT_USE_NEON
|
||||
// zeros w
|
||||
static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
|
||||
static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
|
||||
float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
|
||||
float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
|
||||
float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
|
||||
|
|
@ -1151,7 +1156,7 @@ operator*(const btMatrix3x3& m, const btVector3& v)
|
|||
SIMD_FORCE_INLINE btVector3
|
||||
operator*(const btVector3& v, const btMatrix3x3& m)
|
||||
{
|
||||
#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
|
||||
const __m128 vv = v.mVec128;
|
||||
|
||||
|
|
@ -1191,7 +1196,7 @@ operator*(const btVector3& v, const btMatrix3x3& m)
|
|||
SIMD_FORCE_INLINE btMatrix3x3
|
||||
operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
|
||||
{
|
||||
#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
|
||||
__m128 m10 = m1[0].mVec128;
|
||||
__m128 m11 = m1[1].mVec128;
|
||||
|
|
|
|||
504
Engine/lib/bullet/src/LinearMath/btMatrixX.h
Normal file
504
Engine/lib/bullet/src/LinearMath/btMatrixX.h
Normal file
|
|
@ -0,0 +1,504 @@
|
|||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
///original version written by Erwin Coumans, October 2013
|
||||
|
||||
#ifndef BT_MATRIX_X_H
|
||||
#define BT_MATRIX_X_H
|
||||
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "LinearMath/btAlignedObjectArray.h"
|
||||
|
||||
class btIntSortPredicate
|
||||
{
|
||||
public:
|
||||
bool operator() ( const int& a, const int& b ) const
|
||||
{
|
||||
return a < b;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct btMatrixX
|
||||
{
|
||||
int m_rows;
|
||||
int m_cols;
|
||||
int m_operations;
|
||||
int m_resizeOperations;
|
||||
int m_setElemOperations;
|
||||
|
||||
btAlignedObjectArray<T> m_storage;
|
||||
btAlignedObjectArray< btAlignedObjectArray<int> > m_rowNonZeroElements1;
|
||||
btAlignedObjectArray< btAlignedObjectArray<int> > m_colNonZeroElements;
|
||||
|
||||
T* getBufferPointerWritable()
|
||||
{
|
||||
return m_storage.size() ? &m_storage[0] : 0;
|
||||
}
|
||||
|
||||
const T* getBufferPointer() const
|
||||
{
|
||||
return m_storage.size() ? &m_storage[0] : 0;
|
||||
}
|
||||
btMatrixX()
|
||||
:m_rows(0),
|
||||
m_cols(0),
|
||||
m_operations(0),
|
||||
m_resizeOperations(0),
|
||||
m_setElemOperations(0)
|
||||
{
|
||||
}
|
||||
btMatrixX(int rows,int cols)
|
||||
:m_rows(rows),
|
||||
m_cols(cols),
|
||||
m_operations(0),
|
||||
m_resizeOperations(0),
|
||||
m_setElemOperations(0)
|
||||
{
|
||||
resize(rows,cols);
|
||||
}
|
||||
void resize(int rows, int cols)
|
||||
{
|
||||
m_resizeOperations++;
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
{
|
||||
BT_PROFILE("m_storage.resize");
|
||||
m_storage.resize(rows*cols);
|
||||
}
|
||||
clearSparseInfo();
|
||||
}
|
||||
int cols() const
|
||||
{
|
||||
return m_cols;
|
||||
}
|
||||
int rows() const
|
||||
{
|
||||
return m_rows;
|
||||
}
|
||||
///we don't want this read/write operator(), because we cannot keep track of non-zero elements, use setElem instead
|
||||
/*T& operator() (int row,int col)
|
||||
{
|
||||
return m_storage[col*m_rows+row];
|
||||
}
|
||||
*/
|
||||
|
||||
void addElem(int row,int col, T val)
|
||||
{
|
||||
if (val)
|
||||
{
|
||||
if (m_storage[col+row*m_cols]==0.f)
|
||||
{
|
||||
setElem(row,col,val);
|
||||
} else
|
||||
{
|
||||
m_storage[row*m_cols+col] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copyLowerToUpperTriangle()
|
||||
{
|
||||
int count=0;
|
||||
for (int row=0;row<m_rowNonZeroElements1.size();row++)
|
||||
{
|
||||
for (int j=0;j<m_rowNonZeroElements1[row].size();j++)
|
||||
{
|
||||
int col = m_rowNonZeroElements1[row][j];
|
||||
setElem(col,row, (*this)(row,col));
|
||||
count++;
|
||||
|
||||
}
|
||||
}
|
||||
//printf("copyLowerToUpperTriangle copied %d elements out of %dx%d=%d\n", count,rows(),cols(),cols()*rows());
|
||||
}
|
||||
void setElem(int row,int col, T val)
|
||||
{
|
||||
m_setElemOperations++;
|
||||
if (val)
|
||||
{
|
||||
if (m_storage[col+row*m_cols]==0.f)
|
||||
{
|
||||
m_rowNonZeroElements1[row].push_back(col);
|
||||
m_colNonZeroElements[col].push_back(row);
|
||||
}
|
||||
m_storage[row*m_cols+col] = val;
|
||||
}
|
||||
}
|
||||
const T& operator() (int row,int col) const
|
||||
{
|
||||
return m_storage[col+row*m_cols];
|
||||
}
|
||||
|
||||
void clearSparseInfo()
|
||||
{
|
||||
BT_PROFILE("clearSparseInfo=0");
|
||||
m_rowNonZeroElements1.resize(m_rows);
|
||||
m_colNonZeroElements.resize(m_cols);
|
||||
for (int i=0;i<m_rows;i++)
|
||||
m_rowNonZeroElements1[i].resize(0);
|
||||
for (int j=0;j<m_cols;j++)
|
||||
m_colNonZeroElements[j].resize(0);
|
||||
}
|
||||
|
||||
void setZero()
|
||||
{
|
||||
{
|
||||
BT_PROFILE("storage=0");
|
||||
btSetZero(&m_storage[0],m_storage.size());
|
||||
//memset(&m_storage[0],0,sizeof(T)*m_storage.size());
|
||||
//for (int i=0;i<m_storage.size();i++)
|
||||
// m_storage[i]=0;
|
||||
}
|
||||
{
|
||||
BT_PROFILE("clearSparseInfo=0");
|
||||
clearSparseInfo();
|
||||
}
|
||||
}
|
||||
|
||||
void printMatrix(const char* msg)
|
||||
{
|
||||
printf("%s ---------------------\n",msg);
|
||||
for (int i=0;i<rows();i++)
|
||||
{
|
||||
printf("\n");
|
||||
for (int j=0;j<cols();j++)
|
||||
{
|
||||
printf("%2.1f\t",(*this)(i,j));
|
||||
}
|
||||
}
|
||||
printf("\n---------------------\n");
|
||||
|
||||
}
|
||||
void printNumZeros(const char* msg)
|
||||
{
|
||||
printf("%s: ",msg);
|
||||
int numZeros = 0;
|
||||
for (int i=0;i<m_storage.size();i++)
|
||||
if (m_storage[i]==0)
|
||||
numZeros++;
|
||||
int total = m_cols*m_rows;
|
||||
int computedNonZero = total-numZeros;
|
||||
int nonZero = 0;
|
||||
for (int i=0;i<m_colNonZeroElements.size();i++)
|
||||
nonZero += m_colNonZeroElements[i].size();
|
||||
btAssert(computedNonZero==nonZero);
|
||||
if(computedNonZero!=nonZero)
|
||||
{
|
||||
printf("Error: computedNonZero=%d, but nonZero=%d\n",computedNonZero,nonZero);
|
||||
}
|
||||
//printf("%d numZeros out of %d (%f)\n",numZeros,m_cols*m_rows,numZeros/(m_cols*m_rows));
|
||||
printf("total %d, %d rows, %d cols, %d non-zeros (%f %)\n", total, rows(),cols(), nonZero,100.f*(T)nonZero/T(total));
|
||||
}
|
||||
/*
|
||||
void rowComputeNonZeroElements()
|
||||
{
|
||||
m_rowNonZeroElements1.resize(rows());
|
||||
for (int i=0;i<rows();i++)
|
||||
{
|
||||
m_rowNonZeroElements1[i].resize(0);
|
||||
for (int j=0;j<cols();j++)
|
||||
{
|
||||
if ((*this)(i,j)!=0.f)
|
||||
{
|
||||
m_rowNonZeroElements1[i].push_back(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
btMatrixX transpose() const
|
||||
{
|
||||
//transpose is optimized for sparse matrices
|
||||
btMatrixX tr(m_cols,m_rows);
|
||||
tr.setZero();
|
||||
#if 0
|
||||
for (int i=0;i<m_cols;i++)
|
||||
for (int j=0;j<m_rows;j++)
|
||||
{
|
||||
T v = (*this)(j,i);
|
||||
if (v)
|
||||
{
|
||||
tr.setElem(i,j,v);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i=0;i<m_colNonZeroElements.size();i++)
|
||||
for (int h=0;h<m_colNonZeroElements[i].size();h++)
|
||||
{
|
||||
int j = m_colNonZeroElements[i][h];
|
||||
T v = (*this)(j,i);
|
||||
tr.setElem(i,j,v);
|
||||
}
|
||||
#endif
|
||||
return tr;
|
||||
}
|
||||
|
||||
void sortRowIndexArrays()
|
||||
{
|
||||
for (int i=0;i<m_rowNonZeroElements1[i].size();i++)
|
||||
{
|
||||
m_rowNonZeroElements1[i].quickSort(btIntSortPredicate());
|
||||
}
|
||||
}
|
||||
|
||||
void sortColIndexArrays()
|
||||
{
|
||||
for (int i=0;i<m_colNonZeroElements[i].size();i++)
|
||||
{
|
||||
m_colNonZeroElements[i].quickSort(btIntSortPredicate());
|
||||
}
|
||||
}
|
||||
|
||||
btMatrixX operator*(const btMatrixX& other)
|
||||
{
|
||||
//btMatrixX*btMatrixX implementation, optimized for sparse matrices
|
||||
btAssert(cols() == other.rows());
|
||||
|
||||
btMatrixX res(rows(),other.cols());
|
||||
res.setZero();
|
||||
// BT_PROFILE("btMatrixX mul");
|
||||
for (int j=0; j < res.cols(); ++j)
|
||||
{
|
||||
//int numZero=other.m_colNonZeroElements[j].size();
|
||||
//if (numZero)
|
||||
{
|
||||
for (int i=0; i < res.rows(); ++i)
|
||||
//for (int g = 0;g<m_colNonZeroElements[j].size();g++)
|
||||
{
|
||||
T dotProd=0;
|
||||
T dotProd2=0;
|
||||
int waste=0,waste2=0;
|
||||
|
||||
bool doubleWalk = false;
|
||||
if (doubleWalk)
|
||||
{
|
||||
int numRows = m_rowNonZeroElements1[i].size();
|
||||
int numOtherCols = other.m_colNonZeroElements[j].size();
|
||||
for (int ii=0;ii<numRows;ii++)
|
||||
{
|
||||
int vThis=m_rowNonZeroElements1[i][ii];
|
||||
}
|
||||
|
||||
for (int ii=0;ii<numOtherCols;ii++)
|
||||
{
|
||||
int vOther = other.m_colNonZeroElements[j][ii];
|
||||
}
|
||||
|
||||
|
||||
int indexRow = 0;
|
||||
int indexOtherCol = 0;
|
||||
while (indexRow < numRows && indexOtherCol < numOtherCols)
|
||||
{
|
||||
int vThis=m_rowNonZeroElements1[i][indexRow];
|
||||
int vOther = other.m_colNonZeroElements[j][indexOtherCol];
|
||||
if (vOther==vThis)
|
||||
{
|
||||
dotProd += (*this)(i,vThis) * other(vThis,j);
|
||||
}
|
||||
if (vThis<vOther)
|
||||
{
|
||||
indexRow++;
|
||||
} else
|
||||
{
|
||||
indexOtherCol++;
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
{
|
||||
bool useOtherCol = true;
|
||||
if (other.m_colNonZeroElements[j].size() <m_rowNonZeroElements1[i].size())
|
||||
{
|
||||
useOtherCol=true;
|
||||
}
|
||||
if (!useOtherCol )
|
||||
{
|
||||
for (int q=0;q<other.m_colNonZeroElements[j].size();q++)
|
||||
{
|
||||
int v = other.m_colNonZeroElements[j][q];
|
||||
T w = (*this)(i,v);
|
||||
if (w!=0.f)
|
||||
{
|
||||
dotProd+=w*other(v,j);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int q=0;q<m_rowNonZeroElements1[i].size();q++)
|
||||
{
|
||||
int v=m_rowNonZeroElements1[i][q];
|
||||
T w = (*this)(i,v);
|
||||
if (other(v,j)!=0.f)
|
||||
{
|
||||
dotProd+=w*other(v,j);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dotProd)
|
||||
res.setElem(i,j,dotProd);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// this assumes the 4th and 8th rows of B and C are zero.
|
||||
void multiplyAdd2_p8r (const btScalar *B, const btScalar *C, int numRows, int numRowsOther ,int row, int col)
|
||||
{
|
||||
const btScalar *bb = B;
|
||||
for ( int i = 0;i<numRows;i++)
|
||||
{
|
||||
const btScalar *cc = C;
|
||||
for ( int j = 0;j<numRowsOther;j++)
|
||||
{
|
||||
btScalar sum;
|
||||
sum = bb[0]*cc[0];
|
||||
sum += bb[1]*cc[1];
|
||||
sum += bb[2]*cc[2];
|
||||
sum += bb[4]*cc[4];
|
||||
sum += bb[5]*cc[5];
|
||||
sum += bb[6]*cc[6];
|
||||
addElem(row+i,col+j,sum);
|
||||
cc += 8;
|
||||
}
|
||||
bb += 8;
|
||||
}
|
||||
}
|
||||
|
||||
void multiply2_p8r (const btScalar *B, const btScalar *C, int numRows, int numRowsOther, int row, int col)
|
||||
{
|
||||
btAssert (numRows>0 && numRowsOther>0 && B && C);
|
||||
const btScalar *bb = B;
|
||||
for ( int i = 0;i<numRows;i++)
|
||||
{
|
||||
const btScalar *cc = C;
|
||||
for ( int j = 0;j<numRowsOther;j++)
|
||||
{
|
||||
btScalar sum;
|
||||
sum = bb[0]*cc[0];
|
||||
sum += bb[1]*cc[1];
|
||||
sum += bb[2]*cc[2];
|
||||
sum += bb[4]*cc[4];
|
||||
sum += bb[5]*cc[5];
|
||||
sum += bb[6]*cc[6];
|
||||
setElem(row+i,col+j,sum);
|
||||
cc += 8;
|
||||
}
|
||||
bb += 8;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct btVectorX
|
||||
{
|
||||
btAlignedObjectArray<T> m_storage;
|
||||
|
||||
btVectorX()
|
||||
{
|
||||
}
|
||||
btVectorX(int numRows)
|
||||
{
|
||||
m_storage.resize(numRows);
|
||||
}
|
||||
|
||||
void resize(int rows)
|
||||
{
|
||||
m_storage.resize(rows);
|
||||
}
|
||||
int cols() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
int rows() const
|
||||
{
|
||||
return m_storage.size();
|
||||
}
|
||||
int size() const
|
||||
{
|
||||
return rows();
|
||||
}
|
||||
void setZero()
|
||||
{
|
||||
// for (int i=0;i<m_storage.size();i++)
|
||||
// m_storage[i]=0;
|
||||
//memset(&m_storage[0],0,sizeof(T)*m_storage.size());
|
||||
btSetZero(&m_storage[0],m_storage.size());
|
||||
}
|
||||
const T& operator[] (int index) const
|
||||
{
|
||||
return m_storage[index];
|
||||
}
|
||||
|
||||
T& operator[] (int index)
|
||||
{
|
||||
return m_storage[index];
|
||||
}
|
||||
|
||||
T* getBufferPointerWritable()
|
||||
{
|
||||
return m_storage.size() ? &m_storage[0] : 0;
|
||||
}
|
||||
|
||||
const T* getBufferPointer() const
|
||||
{
|
||||
return m_storage.size() ? &m_storage[0] : 0;
|
||||
}
|
||||
|
||||
};
|
||||
/*
|
||||
template <typename T>
|
||||
void setElem(btMatrixX<T>& mat, int row, int col, T val)
|
||||
{
|
||||
mat.setElem(row,col,val);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
typedef btMatrixX<float> btMatrixXf;
|
||||
typedef btVectorX<float> btVectorXf;
|
||||
|
||||
typedef btMatrixX<double> btMatrixXd;
|
||||
typedef btVectorX<double> btVectorXd;
|
||||
|
||||
|
||||
|
||||
inline void setElem(btMatrixXd& mat, int row, int col, double val)
|
||||
{
|
||||
mat.setElem(row,col,val);
|
||||
}
|
||||
|
||||
inline void setElem(btMatrixXf& mat, int row, int col, float val)
|
||||
{
|
||||
mat.setElem(row,col,val);
|
||||
}
|
||||
|
||||
#ifdef BT_USE_DOUBLE_PRECISION
|
||||
#define btVectorXu btVectorXd
|
||||
#define btMatrixXu btMatrixXd
|
||||
#else
|
||||
#define btVectorXu btVectorXf
|
||||
#define btMatrixXu btMatrixXf
|
||||
#endif //BT_USE_DOUBLE_PRECISION
|
||||
|
||||
|
||||
|
||||
#endif//BT_MATRIX_H_H
|
||||
|
|
@ -60,10 +60,10 @@ unsigned int btPolarDecomposition::decompose(const btMatrix3x3& a, btMatrix3x3&
|
|||
break;
|
||||
|
||||
const btScalar gamma = btPow(h_norm / u_norm, 0.25f);
|
||||
const btScalar inv_gamma = 1.0 / gamma;
|
||||
const btScalar inv_gamma = btScalar(1.0) / gamma;
|
||||
|
||||
// Determine the delta to 'u'
|
||||
const btMatrix3x3 delta = (u * (gamma - 2.0) + h.transpose() * inv_gamma) * 0.5;
|
||||
const btMatrix3x3 delta = (u * (gamma - btScalar(2.0)) + h.transpose() * inv_gamma) * btScalar(0.5);
|
||||
|
||||
// Update the matrices
|
||||
u += delta;
|
||||
|
|
|
|||
|
|
@ -27,11 +27,17 @@ subject to the following restrictions:
|
|||
|
||||
#ifdef BT_USE_SSE
|
||||
|
||||
const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
#define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
|
||||
#if defined(BT_USE_SSE)
|
||||
|
||||
#define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
|
||||
#define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
|
||||
|
||||
#elif defined(BT_USE_NEON)
|
||||
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
|
||||
|
|
@ -285,7 +291,7 @@ public:
|
|||
* @param q The other quaternion */
|
||||
btScalar dot(const btQuaternion& q) const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vd;
|
||||
|
||||
vd = _mm_mul_ps(mVec128, q.mVec128);
|
||||
|
|
@ -384,7 +390,7 @@ public:
|
|||
{
|
||||
return *this / length();
|
||||
}
|
||||
/**@brief Return the angle between this quaternion and the other
|
||||
/**@brief Return the ***half*** angle between this quaternion and the other
|
||||
* @param q The other quaternion */
|
||||
btScalar angle(const btQuaternion& q) const
|
||||
{
|
||||
|
|
@ -392,6 +398,19 @@ public:
|
|||
btAssert(s != btScalar(0.0));
|
||||
return btAcos(dot(q) / s);
|
||||
}
|
||||
|
||||
/**@brief Return the angle between this quaternion and the other along the shortest path
|
||||
* @param q The other quaternion */
|
||||
btScalar angleShortestPath(const btQuaternion& q) const
|
||||
{
|
||||
btScalar s = btSqrt(length2() * q.length2());
|
||||
btAssert(s != btScalar(0.0));
|
||||
if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
|
||||
return btAcos(dot(-q) / s) * btScalar(2.0);
|
||||
else
|
||||
return btAcos(dot(q) / s) * btScalar(2.0);
|
||||
}
|
||||
|
||||
/**@brief Return the angle of rotation represented by this quaternion */
|
||||
btScalar getAngle() const
|
||||
{
|
||||
|
|
@ -399,6 +418,19 @@ public:
|
|||
return s;
|
||||
}
|
||||
|
||||
/**@brief Return the angle of rotation represented by this quaternion along the shortest path*/
|
||||
btScalar getAngleShortestPath() const
|
||||
{
|
||||
btScalar s;
|
||||
if (dot(*this) < 0)
|
||||
s = btScalar(2.) * btAcos(m_floats[3]);
|
||||
else
|
||||
s = btScalar(2.) * btAcos(-m_floats[3]);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
/**@brief Return the axis of the rotation represented by this quaternion */
|
||||
btVector3 getAxis() const
|
||||
{
|
||||
|
|
@ -498,7 +530,7 @@ public:
|
|||
btAssert(magnitude > btScalar(0));
|
||||
|
||||
btScalar product = dot(q) / magnitude;
|
||||
if (btFabs(product) != btScalar(1))
|
||||
if (btFabs(product) < btScalar(1))
|
||||
{
|
||||
// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
|
||||
const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
|
||||
|
|
@ -835,7 +867,7 @@ quatRotate(const btQuaternion& rotation, const btVector3& v)
|
|||
{
|
||||
btQuaternion q = rotation * v;
|
||||
q *= rotation.inverse();
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ subject to the following restrictions:
|
|||
#include <float.h>
|
||||
|
||||
/* SVN $Revision$ on $Date$ from http://bullet.googlecode.com*/
|
||||
#define BT_BULLET_VERSION 281
|
||||
#define BT_BULLET_VERSION 282
|
||||
|
||||
inline int btGetVersion()
|
||||
{
|
||||
|
|
@ -68,6 +68,10 @@ inline int btGetVersion()
|
|||
#else
|
||||
|
||||
#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
|
||||
#if _MSC_VER>1400
|
||||
#define BT_USE_SIMD_VECTOR3
|
||||
#endif
|
||||
|
||||
#define BT_USE_SSE
|
||||
#ifdef BT_USE_SSE
|
||||
//BT_USE_SSE_IN_API is disabled under Windows by default, because
|
||||
|
|
@ -159,7 +163,8 @@ inline int btGetVersion()
|
|||
|
||||
#if (defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#define BT_USE_SSE
|
||||
#define BT_USE_SIMD_VECTOR3
|
||||
#define BT_USE_SSE
|
||||
//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
|
||||
//if apps run into issues, we will disable the next line
|
||||
#define BT_USE_SSE_IN_API
|
||||
|
|
@ -175,10 +180,11 @@ inline int btGetVersion()
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
#endif //BT_USE_SSE
|
||||
#elif defined( __armv7__ )
|
||||
#elif defined( __ARM_NEON__ )
|
||||
#ifdef __clang__
|
||||
#define BT_USE_NEON 1
|
||||
|
||||
#define BT_USE_SIMD_VECTOR3
|
||||
|
||||
#if defined BT_USE_NEON && defined (__clang__)
|
||||
#include <arm_neon.h>
|
||||
#endif//BT_USE_NEON
|
||||
|
|
@ -207,8 +213,7 @@ inline int btGetVersion()
|
|||
}
|
||||
#else//defined (__i386__) || defined (__x86_64__)
|
||||
#define btAssert assert
|
||||
#end//defined (__i386__) || defined (__x86_64__)
|
||||
#endif
|
||||
#endif//defined (__i386__) || defined (__x86_64__)
|
||||
#else//defined(DEBUG) || defined (_DEBUG)
|
||||
#define btAssert(x)
|
||||
#endif//defined(DEBUG) || defined (_DEBUG)
|
||||
|
|
@ -252,10 +257,12 @@ inline int btGetVersion()
|
|||
|
||||
///The btScalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
|
||||
#if defined(BT_USE_DOUBLE_PRECISION)
|
||||
|
||||
typedef double btScalar;
|
||||
//this number could be bigger in double precision
|
||||
#define BT_LARGE_FLOAT 1e30
|
||||
#else
|
||||
|
||||
typedef float btScalar;
|
||||
//keep BT_LARGE_FLOAT*BT_LARGE_FLOAT < FLT_MAX
|
||||
#define BT_LARGE_FLOAT 1e18f
|
||||
|
|
@ -265,7 +272,8 @@ typedef float btScalar;
|
|||
typedef __m128 btSimdFloat4;
|
||||
#endif//BT_USE_SSE
|
||||
|
||||
#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
|
||||
#if defined (BT_USE_SSE)
|
||||
//#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifndef BT_NAN
|
||||
|
|
@ -278,6 +286,8 @@ static int btInfinityMask = 0x7F800000;
|
|||
#define BT_INFINITY (*(float*)&btInfinityMask)
|
||||
#endif
|
||||
|
||||
//use this, in case there are clashes (such as xnamath.h)
|
||||
#ifndef BT_NO_SIMD_OPERATOR_OVERLOADS
|
||||
inline __m128 operator + (const __m128 A, const __m128 B)
|
||||
{
|
||||
return _mm_add_ps(A, B);
|
||||
|
|
@ -292,6 +302,7 @@ inline __m128 operator * (const __m128 A, const __m128 B)
|
|||
{
|
||||
return _mm_mul_ps(A, B);
|
||||
}
|
||||
#endif //BT_NO_SIMD_OPERATOR_OVERLOADS
|
||||
|
||||
#define btCastfTo128i(a) (_mm_castps_si128(a))
|
||||
#define btCastfTo128d(a) (_mm_castps_pd(a))
|
||||
|
|
@ -311,7 +322,24 @@ inline __m128 operator * (const __m128 A, const __m128 B)
|
|||
#define BT_INFINITY INFINITY
|
||||
#define BT_NAN NAN
|
||||
#endif//_WIN32
|
||||
#endif //BT_USE_SSE_IN_API
|
||||
#else
|
||||
|
||||
#ifdef BT_USE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef float32x4_t btSimdFloat4;
|
||||
#define BT_INFINITY INFINITY
|
||||
#define BT_NAN NAN
|
||||
#define btAssign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
|
||||
#else//BT_USE_NEON
|
||||
|
||||
#ifndef BT_INFINITY
|
||||
static int btInfinityMask = 0x7F800000;
|
||||
#define BT_INFINITY (*(float*)&btInfinityMask)
|
||||
#endif
|
||||
#endif//BT_USE_NEON
|
||||
|
||||
#endif //BT_USE_SSE
|
||||
|
||||
#ifdef BT_USE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
|
@ -403,15 +431,15 @@ SIMD_FORCE_INLINE btScalar btFmod(btScalar x,btScalar y) { return fmodf(x,y); }
|
|||
|
||||
#endif
|
||||
|
||||
#define SIMD_2_PI btScalar(6.283185307179586232)
|
||||
#define SIMD_PI (SIMD_2_PI * btScalar(0.5))
|
||||
#define SIMD_HALF_PI (SIMD_2_PI * btScalar(0.25))
|
||||
#define SIMD_PI btScalar(3.1415926535897932384626433832795029)
|
||||
#define SIMD_2_PI btScalar(2.0) * SIMD_PI
|
||||
#define SIMD_HALF_PI (SIMD_PI * btScalar(0.5))
|
||||
#define SIMD_RADS_PER_DEG (SIMD_2_PI / btScalar(360.0))
|
||||
#define SIMD_DEGS_PER_RAD (btScalar(360.0) / SIMD_2_PI)
|
||||
#define SIMDSQRT12 btScalar(0.7071067811865475244008443621048490)
|
||||
|
||||
#define btRecipSqrt(x) ((btScalar)(btScalar(1.0)/btSqrt(btScalar(x)))) /* reciprocal square root */
|
||||
|
||||
#define btRecip(x) (btScalar(1.0)/btScalar(x))
|
||||
|
||||
#ifdef BT_USE_DOUBLE_PRECISION
|
||||
#define SIMD_EPSILON DBL_EPSILON
|
||||
|
|
@ -602,6 +630,46 @@ SIMD_FORCE_INLINE double btUnswapEndianDouble(const unsigned char *src)
|
|||
return d;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
SIMD_FORCE_INLINE void btSetZero(T* a, int n)
|
||||
{
|
||||
T* acurr = a;
|
||||
size_t ncurr = n;
|
||||
while (ncurr > 0)
|
||||
{
|
||||
*(acurr++) = 0;
|
||||
--ncurr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SIMD_FORCE_INLINE btScalar btLargeDot(const btScalar *a, const btScalar *b, int n)
|
||||
{
|
||||
btScalar p0,q0,m0,p1,q1,m1,sum;
|
||||
sum = 0;
|
||||
n -= 2;
|
||||
while (n >= 0) {
|
||||
p0 = a[0]; q0 = b[0];
|
||||
m0 = p0 * q0;
|
||||
p1 = a[1]; q1 = b[1];
|
||||
m1 = p1 * q1;
|
||||
sum += m0;
|
||||
sum += m1;
|
||||
a += 2;
|
||||
b += 2;
|
||||
n -= 2;
|
||||
}
|
||||
n += 2;
|
||||
while (n > 0) {
|
||||
sum += (*a) * (*b);
|
||||
a++;
|
||||
b++;
|
||||
n--;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
// returns normalized value in range [-SIMD_PI, SIMD_PI]
|
||||
SIMD_FORCE_INLINE btScalar btNormalizeAngle(btScalar angleInRadians)
|
||||
{
|
||||
|
|
@ -620,6 +688,8 @@ SIMD_FORCE_INLINE btScalar btNormalizeAngle(btScalar angleInRadians)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
///rudimentary class to provide type info
|
||||
struct btTypedObject
|
||||
{
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -17,7 +17,6 @@ subject to the following restrictions:
|
|||
#define BT_SERIALIZER_H
|
||||
|
||||
#include "btScalar.h" // has definitions like SIMD_FORCE_INLINE
|
||||
#include "btStackAlloc.h"
|
||||
#include "btHashMap.h"
|
||||
|
||||
#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
|
||||
|
|
@ -439,7 +438,7 @@ public:
|
|||
|
||||
buffer[9] = '2';
|
||||
buffer[10] = '8';
|
||||
buffer[11] = '1';
|
||||
buffer[11] = '2';
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,9 +19,17 @@
|
|||
#define BT_USE_SSE_IN_API
|
||||
#endif
|
||||
|
||||
|
||||
#include "btVector3.h"
|
||||
|
||||
#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
|
||||
|
||||
|
||||
#if defined BT_USE_SIMD_VECTOR3
|
||||
|
||||
#if DEBUG
|
||||
#include <string.h>//for memset
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <stdint.h>
|
||||
|
|
@ -43,7 +51,7 @@ long _maxdot_large( const float *vv, const float *vec, unsigned long count, floa
|
|||
long _maxdot_large( const float *vv, const float *vec, unsigned long count, float *dotResult )
|
||||
{
|
||||
const float4 *vertices = (const float4*) vv;
|
||||
static const unsigned char indexTable[16] = {-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
|
||||
static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
|
||||
float4 dotMax = btAssign128( -BT_INFINITY, -BT_INFINITY, -BT_INFINITY, -BT_INFINITY );
|
||||
float4 vvec = _mm_loadu_ps( vec );
|
||||
float4 vHi = btCastiTo128f(_mm_shuffle_epi32( btCastfTo128i( vvec), 0xaa )); /// zzzz
|
||||
|
|
@ -428,7 +436,7 @@ long _mindot_large( const float *vv, const float *vec, unsigned long count, floa
|
|||
long _mindot_large( const float *vv, const float *vec, unsigned long count, float *dotResult )
|
||||
{
|
||||
const float4 *vertices = (const float4*) vv;
|
||||
static const unsigned char indexTable[16] = {-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
|
||||
static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
|
||||
float4 dotmin = btAssign128( BT_INFINITY, BT_INFINITY, BT_INFINITY, BT_INFINITY );
|
||||
float4 vvec = _mm_loadu_ps( vec );
|
||||
float4 vHi = btCastiTo128f(_mm_shuffle_epi32( btCastfTo128i( vvec), 0xaa )); /// zzzz
|
||||
|
|
@ -815,7 +823,8 @@ long _mindot_large( const float *vv, const float *vec, unsigned long count, floa
|
|||
#elif defined BT_USE_NEON
|
||||
#define ARM_NEON_GCC_COMPATIBILITY 1
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h> //for sysctlbyname
|
||||
|
||||
static long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult );
|
||||
static long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult );
|
||||
|
|
@ -827,11 +836,34 @@ static long _mindot_large_sel( const float *vv, const float *vec, unsigned long
|
|||
long (*_maxdot_large)( const float *vv, const float *vec, unsigned long count, float *dotResult ) = _maxdot_large_sel;
|
||||
long (*_mindot_large)( const float *vv, const float *vec, unsigned long count, float *dotResult ) = _mindot_large_sel;
|
||||
|
||||
extern "C" {int _get_cpu_capabilities( void );}
|
||||
|
||||
static inline uint32_t btGetCpuCapabilities( void )
|
||||
{
|
||||
static uint32_t capabilities = 0;
|
||||
static bool testedCapabilities = false;
|
||||
|
||||
if( 0 == testedCapabilities)
|
||||
{
|
||||
uint32_t hasFeature = 0;
|
||||
size_t featureSize = sizeof( hasFeature );
|
||||
int err = sysctlbyname( "hw.optional.neon_hpfp", &hasFeature, &featureSize, NULL, 0 );
|
||||
|
||||
if( 0 == err && hasFeature)
|
||||
capabilities |= 0x2000;
|
||||
|
||||
testedCapabilities = true;
|
||||
}
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static long _maxdot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult )
|
||||
{
|
||||
if( _get_cpu_capabilities() & 0x2000 )
|
||||
|
||||
if( btGetCpuCapabilities() & 0x2000 )
|
||||
_maxdot_large = _maxdot_large_v1;
|
||||
else
|
||||
_maxdot_large = _maxdot_large_v0;
|
||||
|
|
@ -841,7 +873,8 @@ static long _maxdot_large_sel( const float *vv, const float *vec, unsigned long
|
|||
|
||||
static long _mindot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult )
|
||||
{
|
||||
if( _get_cpu_capabilities() & 0x2000 )
|
||||
|
||||
if( btGetCpuCapabilities() & 0x2000 )
|
||||
_mindot_large = _mindot_large_v1;
|
||||
else
|
||||
_mindot_large = _mindot_large_v0;
|
||||
|
|
@ -864,8 +897,8 @@ long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, f
|
|||
float32x2_t dotMaxHi = (float32x2_t) { -BT_INFINITY, -BT_INFINITY };
|
||||
uint32x2_t indexLo = (uint32x2_t) {0, 1};
|
||||
uint32x2_t indexHi = (uint32x2_t) {2, 3};
|
||||
uint32x2_t iLo = (uint32x2_t) {-1, -1};
|
||||
uint32x2_t iHi = (uint32x2_t) {-1, -1};
|
||||
uint32x2_t iLo = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
|
||||
uint32x2_t iHi = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
|
||||
const uint32x2_t four = (uint32x2_t) {4,4};
|
||||
|
||||
for( ; i+8 <= count; i+= 8 )
|
||||
|
|
@ -1051,7 +1084,7 @@ long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, f
|
|||
float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
|
||||
const uint32x4_t four = (uint32x4_t){ 4, 4, 4, 4 };
|
||||
uint32x4_t local_index = (uint32x4_t) {0, 1, 2, 3};
|
||||
uint32x4_t index = (uint32x4_t) { -1, -1, -1, -1 };
|
||||
uint32x4_t index = (uint32x4_t) { static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1) };
|
||||
float32x4_t maxDot = (float32x4_t) { -BT_INFINITY, -BT_INFINITY, -BT_INFINITY, -BT_INFINITY };
|
||||
|
||||
unsigned long i = 0;
|
||||
|
|
@ -1249,8 +1282,8 @@ long _mindot_large_v0( const float *vv, const float *vec, unsigned long count, f
|
|||
float32x2_t dotMinHi = (float32x2_t) { BT_INFINITY, BT_INFINITY };
|
||||
uint32x2_t indexLo = (uint32x2_t) {0, 1};
|
||||
uint32x2_t indexHi = (uint32x2_t) {2, 3};
|
||||
uint32x2_t iLo = (uint32x2_t) {-1, -1};
|
||||
uint32x2_t iHi = (uint32x2_t) {-1, -1};
|
||||
uint32x2_t iLo = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
|
||||
uint32x2_t iHi = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
|
||||
const uint32x2_t four = (uint32x2_t) {4,4};
|
||||
|
||||
for( ; i+8 <= count; i+= 8 )
|
||||
|
|
@ -1434,7 +1467,7 @@ long _mindot_large_v1( const float *vv, const float *vec, unsigned long count, f
|
|||
float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
|
||||
const uint32x4_t four = (uint32x4_t){ 4, 4, 4, 4 };
|
||||
uint32x4_t local_index = (uint32x4_t) {0, 1, 2, 3};
|
||||
uint32x4_t index = (uint32x4_t) { -1, -1, -1, -1 };
|
||||
uint32x4_t index = (uint32x4_t) { static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1) };
|
||||
float32x4_t minDot = (float32x4_t) { BT_INFINITY, BT_INFINITY, BT_INFINITY, BT_INFINITY };
|
||||
|
||||
unsigned long i = 0;
|
||||
|
|
|
|||
|
|
@ -53,19 +53,24 @@ subject to the following restrictions:
|
|||
#define btvxyzMaskf btvFFF0fMask
|
||||
#define btvAbsfMask btCastiTo128f(btvAbsMask)
|
||||
|
||||
//there is an issue with XCode 3.2 (LCx errors)
|
||||
#define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f))
|
||||
#define v1110 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f))
|
||||
#define vHalf (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f))
|
||||
#define v1_5 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f))
|
||||
|
||||
|
||||
const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
|
||||
const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
//const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef BT_USE_NEON
|
||||
|
||||
const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
|
||||
const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
|
||||
const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
|
||||
static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
|
||||
const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
|
||||
const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
|
||||
|
||||
|
|
@ -229,7 +234,7 @@ public:
|
|||
* @param v The other vector in the dot product */
|
||||
SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
|
||||
__m128 z = _mm_movehl_ps(vd, vd);
|
||||
__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
|
||||
|
|
@ -260,6 +265,12 @@ public:
|
|||
return btSqrt(length2());
|
||||
}
|
||||
|
||||
/**@brief Return the norm (length) of the vector */
|
||||
SIMD_FORCE_INLINE btScalar norm() const
|
||||
{
|
||||
return length();
|
||||
}
|
||||
|
||||
/**@brief Return the distance squared between the ends of this and another vector
|
||||
* This is symantically treating the vector like a point */
|
||||
SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
|
||||
|
|
@ -285,6 +296,9 @@ public:
|
|||
* x^2 + y^2 + z^2 = 1 */
|
||||
SIMD_FORCE_INLINE btVector3& normalize()
|
||||
{
|
||||
|
||||
btAssert(length() != btScalar(0));
|
||||
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
// dot product first
|
||||
__m128 vd = _mm_mul_ps(mVec128, mVec128);
|
||||
|
|
@ -345,7 +359,8 @@ public:
|
|||
/**@brief Return a vector will the absolute values of each element */
|
||||
SIMD_FORCE_INLINE btVector3 absolute() const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return btVector3(vabsq_f32(mVec128));
|
||||
|
|
@ -400,7 +415,7 @@ public:
|
|||
|
||||
SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
// cross:
|
||||
__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
|
||||
__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
|
||||
|
|
@ -632,7 +647,7 @@ public:
|
|||
|
||||
void getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
|
||||
__m128 V = _mm_and_ps(mVec128, btvFFF0fMask);
|
||||
__m128 V0 = _mm_xor_ps(btvMzeroMask, V);
|
||||
|
|
@ -702,7 +717,7 @@ public:
|
|||
/* create a vector as btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 )) */
|
||||
SIMD_FORCE_INLINE btVector3 dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
|
||||
__m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
|
||||
__m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
|
||||
|
|
@ -717,7 +732,7 @@ public:
|
|||
return btVector3(r);
|
||||
|
||||
#elif defined(BT_USE_NEON)
|
||||
static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
|
||||
static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
|
||||
float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
|
||||
float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
|
||||
float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
|
||||
|
|
@ -768,7 +783,7 @@ operator*(const btVector3& v1, const btVector3& v2)
|
|||
SIMD_FORCE_INLINE btVector3
|
||||
operator-(const btVector3& v1, const btVector3& v2)
|
||||
{
|
||||
#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
|
||||
|
||||
// without _mm_and_ps this code causes slowdown in Concave moving
|
||||
__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
|
||||
|
|
@ -788,7 +803,7 @@ operator-(const btVector3& v1, const btVector3& v2)
|
|||
SIMD_FORCE_INLINE btVector3
|
||||
operator-(const btVector3& v)
|
||||
{
|
||||
#if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
|
||||
__m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
|
||||
return btVector3(_mm_and_ps(r, btvFFF0fMask));
|
||||
#elif defined(BT_USE_NEON)
|
||||
|
|
@ -842,7 +857,7 @@ operator/(const btVector3& v, const btScalar& s)
|
|||
SIMD_FORCE_INLINE btVector3
|
||||
operator/(const btVector3& v1, const btVector3& v2)
|
||||
{
|
||||
#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
|
||||
#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
|
||||
__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
|
||||
vec = _mm_and_ps(vec, btvFFF0fMask);
|
||||
return btVector3(vec);
|
||||
|
|
@ -935,20 +950,16 @@ SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
|
|||
|
||||
SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
btVector3 norm = *this;
|
||||
|
||||
return norm.normalize();
|
||||
#else
|
||||
return *this / length();
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
|
||||
{
|
||||
// wAxis must be a unit lenght vector
|
||||
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
|
||||
__m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
|
||||
btScalar ssin = btSin( _angle );
|
||||
|
|
@ -988,7 +999,7 @@ SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btS
|
|||
|
||||
SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
|
||||
{
|
||||
#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
|
||||
#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
|
||||
#if defined _WIN32 || defined (BT_USE_SSE)
|
||||
const long scalar_cutoff = 10;
|
||||
long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
|
||||
|
|
@ -996,10 +1007,8 @@ SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long arra
|
|||
const long scalar_cutoff = 4;
|
||||
extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
|
||||
#endif
|
||||
if( array_count < scalar_cutoff )
|
||||
#else
|
||||
|
||||
#endif//BT_USE_SSE || BT_USE_NEON
|
||||
if( array_count < scalar_cutoff )
|
||||
#endif
|
||||
{
|
||||
btScalar maxDot = -SIMD_INFINITY;
|
||||
int i = 0;
|
||||
|
|
@ -1018,14 +1027,14 @@ SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long arra
|
|||
dotOut = maxDot;
|
||||
return ptIndex;
|
||||
}
|
||||
#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
|
||||
#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
|
||||
return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
|
||||
{
|
||||
#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
|
||||
#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
|
||||
#if defined BT_USE_SSE
|
||||
const long scalar_cutoff = 10;
|
||||
long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
|
||||
|
|
@ -1037,7 +1046,7 @@ SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long arra
|
|||
#endif
|
||||
|
||||
if( array_count < scalar_cutoff )
|
||||
#endif//BT_USE_SSE || BT_USE_NEON
|
||||
#endif
|
||||
{
|
||||
btScalar minDot = SIMD_INFINITY;
|
||||
int i = 0;
|
||||
|
|
@ -1058,9 +1067,9 @@ SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long arra
|
|||
|
||||
return ptIndex;
|
||||
}
|
||||
#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
|
||||
#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
|
||||
return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
|
||||
#endif
|
||||
#endif//BT_USE_SIMD_VECTOR3
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1098,7 +1107,7 @@ public:
|
|||
|
||||
SIMD_FORCE_INLINE btVector4 absolute4() const
|
||||
{
|
||||
#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return btVector4(vabsq_f32(mVec128));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue