Bullet 2.82 update

2026-07-12 23:24:41 +00:00 · 2014-06-10 22:40:30 +10:00 · 2014-06-10 22:40:30 +10:00 · 416c50690e
commit 416c50690e
parent d0a64026b0
146 changed files with 12202 additions and 1422 deletions
--- a/Engine/lib/bullet/src/LinearMath/CMakeLists.txt
+++ b/Engine/lib/bullet/src/LinearMath/CMakeLists.txt
@ -54,7 +54,10 @@ IF (INSTALL_LIBS)
 			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
 				INSTALL(TARGETS LinearMath DESTINATION .)
 			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-				INSTALL(TARGETS LinearMath DESTINATION lib${LIB_SUFFIX})
+				INSTALL(TARGETS LinearMath 
+					RUNTIME DESTINATION bin
+					LIBRARY DESTINATION lib${LIB_SUFFIX}
+					ARCHIVE DESTINATION lib${LIB_SUFFIX})
 				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
 DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
 ".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
--- a/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.cpp
@ -1931,11 +1931,15 @@ void btConvexHullInternal::merge(IntermediateHull& h0, IntermediateHull& h1)
 	}
 }

-
-static bool pointCmp(const btConvexHullInternal::Point32& p, const btConvexHullInternal::Point32& q)
+class pointCmp
 {
-	return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
-}
+	public:
+
+    bool operator() ( const btConvexHullInternal::Point32& p, const btConvexHullInternal::Point32& q ) const
+		{
+			return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
+		}
+};

 void btConvexHullInternal::compute(const void* coords, bool doubleCoords, int stride, int count)
 {
@ -2026,7 +2030,7 @@ void btConvexHullInternal::compute(const void* coords, bool doubleCoords, int st
 			points[i].index = i;
 		}
 	}
-	points.quickSort(pointCmp);
+	points.quickSort(pointCmp());

 	vertexPool.reset();
 	vertexPool.setArraySize(count);
--- a/Engine/lib/bullet/src/LinearMath/btIDebugDraw.h
+++ b/Engine/lib/bullet/src/LinearMath/btIDebugDraw.h
@ -62,29 +62,17 @@ class	btIDebugDraw

 	virtual void	drawSphere(btScalar radius, const btTransform& transform, const btVector3& color)
 	{
-		btVector3 start = transform.getOrigin();
-
-		const btVector3 xoffs = transform.getBasis() * btVector3(radius,0,0);
-		const btVector3 yoffs = transform.getBasis() * btVector3(0,radius,0);
-		const btVector3 zoffs = transform.getBasis() * btVector3(0,0,radius);
-
-		// XY 
-		drawLine(start-xoffs, start+yoffs, color);
-		drawLine(start+yoffs, start+xoffs, color);
-		drawLine(start+xoffs, start-yoffs, color);
-		drawLine(start-yoffs, start-xoffs, color);
-
-		// XZ
-		drawLine(start-xoffs, start+zoffs, color);
-		drawLine(start+zoffs, start+xoffs, color);
-		drawLine(start+xoffs, start-zoffs, color);
-		drawLine(start-zoffs, start-xoffs, color);
-
-		// YZ
-		drawLine(start-yoffs, start+zoffs, color);
-		drawLine(start+zoffs, start+yoffs, color);
-		drawLine(start+yoffs, start-zoffs, color);
-		drawLine(start-zoffs, start-yoffs, color);
+		
+		btVector3 center = transform.getOrigin();
+		btVector3 up = transform.getBasis().getColumn(1);
+		btVector3 axis = transform.getBasis().getColumn(0);
+		btScalar minTh = -SIMD_HALF_PI;
+		btScalar maxTh = SIMD_HALF_PI;
+		btScalar minPs = -SIMD_HALF_PI;
+		btScalar maxPs = SIMD_HALF_PI;
+		btScalar stepDegrees = 30.f;
+		drawSpherePatch(center, up, axis, radius,minTh, maxTh, minPs, maxPs, color, stepDegrees ,false);
+		drawSpherePatch(center, up, -axis, radius,minTh, maxTh, minPs, maxPs, color, stepDegrees,false );
 	}
 	
 	virtual void	drawSphere (const btVector3& p, btScalar radius, const btVector3& color)
@ -179,7 +167,7 @@ class	btIDebugDraw
 		}
 	}
 	virtual void drawSpherePatch(const btVector3& center, const btVector3& up, const btVector3& axis, btScalar radius, 
-		btScalar minTh, btScalar maxTh, btScalar minPs, btScalar maxPs, const btVector3& color, btScalar stepDegrees = btScalar(10.f))
+		btScalar minTh, btScalar maxTh, btScalar minPs, btScalar maxPs, const btVector3& color, btScalar stepDegrees = btScalar(10.f),bool drawCenter = true)
 	{
 		btVector3 vA[74];
 		btVector3 vB[74];
@ -261,18 +249,22 @@ class	btIDebugDraw
 				{
 					drawLine(npole, pvB[j], color);
 				}
-				if(isClosed)
+				
+				if (drawCenter)
 				{
-					if(j == (n_vert-1))
+					if(isClosed)
 					{
-						drawLine(arcStart, pvB[j], color);
+						if(j == (n_vert-1))
+						{
+							drawLine(arcStart, pvB[j], color);
+						}
 					}
-				}
-				else
-				{
-					if(((!i) || (i == (n_hor-1))) && ((!j) || (j == (n_vert-1))))
+					else
 					{
-						drawLine(center, pvB[j], color);
+						if(((!i) || (i == (n_hor-1))) && ((!j) || (j == (n_vert-1))))
+						{
+							drawLine(center, pvB[j], color);
+						}
 					}
 				}
 			}
@ -314,6 +306,8 @@ class	btIDebugDraw

 	virtual void drawCapsule(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
 	{
+		int stepDegrees = 30;
+
 		btVector3 capStart(0.f,0.f,0.f);
 		capStart[upAxis] = -halfHeight;

@ -325,34 +319,47 @@ class	btIDebugDraw

 			btTransform childTransform = transform;
 			childTransform.getOrigin() = transform * capStart;
-			drawSphere(radius, childTransform, color);
+			{
+				btVector3 center = childTransform.getOrigin();
+				btVector3 up = childTransform.getBasis().getColumn((upAxis+1)%3);
+				btVector3 axis = -childTransform.getBasis().getColumn(upAxis);
+				btScalar minTh = -SIMD_HALF_PI;
+				btScalar maxTh = SIMD_HALF_PI;
+				btScalar minPs = -SIMD_HALF_PI;
+				btScalar maxPs = SIMD_HALF_PI;
+				
+				drawSpherePatch(center, up, axis, radius,minTh, maxTh, minPs, maxPs, color, btScalar(stepDegrees) ,false);
+			}
+
+
+
 		}

 		{
 			btTransform childTransform = transform;
 			childTransform.getOrigin() = transform * capEnd;
-			drawSphere(radius, childTransform, color);
+			{
+				btVector3 center = childTransform.getOrigin();
+				btVector3 up = childTransform.getBasis().getColumn((upAxis+1)%3);
+				btVector3 axis = childTransform.getBasis().getColumn(upAxis);
+				btScalar minTh = -SIMD_HALF_PI;
+				btScalar maxTh = SIMD_HALF_PI;
+				btScalar minPs = -SIMD_HALF_PI;
+				btScalar maxPs = SIMD_HALF_PI;
+				drawSpherePatch(center, up, axis, radius,minTh, maxTh, minPs, maxPs, color, btScalar(stepDegrees) ,false);
+			}
 		}

 		// Draw some additional lines
 		btVector3 start = transform.getOrigin();

-		capStart[(upAxis+1)%3] = radius;
-		capEnd[(upAxis+1)%3] = radius;
-		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
-		capStart[(upAxis+1)%3] = -radius;
-		capEnd[(upAxis+1)%3] = -radius;
-		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
-
-		capStart[(upAxis+1)%3] = 0.f;
-		capEnd[(upAxis+1)%3] = 0.f;
-
-		capStart[(upAxis+2)%3] = radius;
-		capEnd[(upAxis+2)%3] = radius;
-		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
-		capStart[(upAxis+2)%3] = -radius;
-		capEnd[(upAxis+2)%3] = -radius;
-		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		for (int i=0;i<360;i+=stepDegrees)
+		{
+			capEnd[(upAxis+1)%3] = capStart[(upAxis+1)%3] = btSin(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
+			capEnd[(upAxis+2)%3] = capStart[(upAxis+2)%3]  = btCos(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
+			drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		}
+		
 	}

 	virtual void drawCylinder(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
@ -360,11 +367,18 @@ class	btIDebugDraw
 		btVector3 start = transform.getOrigin();
 		btVector3	offsetHeight(0,0,0);
 		offsetHeight[upAxis] = halfHeight;
-		btVector3	offsetRadius(0,0,0);
-		offsetRadius[(upAxis+1)%3] = radius;
-		drawLine(start+transform.getBasis() * (offsetHeight+offsetRadius),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
-		drawLine(start+transform.getBasis() * (offsetHeight-offsetRadius),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
+		int stepDegrees=30;
+		btVector3 capStart(0.f,0.f,0.f);
+		capStart[upAxis] = -halfHeight;
+		btVector3 capEnd(0.f,0.f,0.f);
+		capEnd[upAxis] = halfHeight;

+		for (int i=0;i<360;i+=stepDegrees)
+		{
+			capEnd[(upAxis+1)%3] = capStart[(upAxis+1)%3] = btSin(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
+			capEnd[(upAxis+2)%3] = capStart[(upAxis+2)%3]  = btCos(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
+			drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		}
 		// Drawing top and bottom caps of the cylinder
 		btVector3 yaxis(0,0,0);
 		yaxis[upAxis] = btScalar(1.0);
@ -376,16 +390,28 @@ class	btIDebugDraw

 	virtual void drawCone(btScalar radius, btScalar height, int upAxis, const btTransform& transform, const btVector3& color)
 	{
-
+		int stepDegrees = 30;
 		btVector3 start = transform.getOrigin();

 		btVector3	offsetHeight(0,0,0);
-		offsetHeight[upAxis] = height * btScalar(0.5);
+		btScalar halfHeight = height * btScalar(0.5);
+		offsetHeight[upAxis] = halfHeight;
 		btVector3	offsetRadius(0,0,0);
 		offsetRadius[(upAxis+1)%3] = radius;
 		btVector3	offset2Radius(0,0,0);
 		offset2Radius[(upAxis+2)%3] = radius;

+
+		btVector3 capEnd(0.f,0.f,0.f);
+		capEnd[upAxis] = -halfHeight;
+
+		for (int i=0;i<360;i+=stepDegrees)
+		{
+			capEnd[(upAxis+1)%3] = btSin(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
+			capEnd[(upAxis+2)%3] = btCos(btScalar(i)*SIMD_RADS_PER_DEG)*radius;
+			drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * capEnd, color);
+		}
+
 		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
 		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
 		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offset2Radius),color);
--- a/Engine/lib/bullet/src/LinearMath/btMatrix3x3.h
+++ b/Engine/lib/bullet/src/LinearMath/btMatrix3x3.h
@ -22,10 +22,15 @@ subject to the following restrictions:

 #ifdef BT_USE_SSE
 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
-const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
+//const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
+#define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
 #endif

-#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
+#if defined(BT_USE_SSE)
+#define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
+#define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
+#define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
+#elif defined(BT_USE_NEON)
 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
@ -207,7 +212,7 @@ public:
 		btFullAssert(d != btScalar(0.0));
 		btScalar s = btScalar(2.0) / d;
    
-    #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+    #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
        __m128	vs, Q = q.get128();
 		__m128i Qi = btCastfTo128i(Q);
        __m128	Y, Z;
@ -341,7 +346,7 @@ public:
 	* @param m The array to be filled */
 	void getOpenGLSubMatrix(btScalar *m) const 
 	{
-#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
        __m128 v0 = m_el[0].mVec128;
        __m128 v1 = m_el[1].mVec128;
        __m128 v2 = m_el[2].mVec128;    //  x2 y2 z2 w2
@ -362,7 +367,7 @@ public:
        vm[2] = v2;
 #elif defined(BT_USE_NEON)
        // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
-        static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
+        static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
        float32x4_t *vm = (float32x4_t *)m;
        float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  // {x0 x1 z0 z1}, {y0 y1 w0 w1}
        float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       // {x2  0 }, {y2 0}
@ -740,7 +745,7 @@ public:
 SIMD_FORCE_INLINE btMatrix3x3& 
 btMatrix3x3::operator*=(const btMatrix3x3& m)
 {
-#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
    __m128 rv00, rv01, rv02;
    __m128 rv10, rv11, rv12;
    __m128 rv20, rv21, rv22;
@ -953,7 +958,7 @@ btMatrix3x3::determinant() const
 SIMD_FORCE_INLINE btMatrix3x3 
 btMatrix3x3::absolute() const
 {
-#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
    return btMatrix3x3(
            _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
            _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
@ -974,7 +979,7 @@ btMatrix3x3::absolute() const
 SIMD_FORCE_INLINE btMatrix3x3 
 btMatrix3x3::transpose() const 
 {
-#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
    __m128 v0 = m_el[0].mVec128;
    __m128 v1 = m_el[1].mVec128;
    __m128 v2 = m_el[2].mVec128;    //  x2 y2 z2 w2
@ -993,7 +998,7 @@ btMatrix3x3::transpose() const
    return btMatrix3x3( v0, v1, v2 );
 #elif defined(BT_USE_NEON)
    // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
-    static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
+    static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
    float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  // {x0 x1 z0 z1}, {y0 y1 w0 w1}
    float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       // {x2  0 }, {y2 0}
    float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
@ -1031,7 +1036,7 @@ btMatrix3x3::inverse() const
 SIMD_FORCE_INLINE btMatrix3x3 
 btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
 {
-#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
    // zeros w
 //    static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
    __m128 row = m_el[0].mVec128;
@ -1053,7 +1058,7 @@ btMatrix3x3::transposeTimes(const btMatrix3x3& m) const

 #elif defined BT_USE_NEON
    // zeros w
-    static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
+    static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
    float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
    float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
    float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
@ -1151,7 +1156,7 @@ operator*(const btMatrix3x3& m, const btVector3& v)
 SIMD_FORCE_INLINE btVector3
 operator*(const btVector3& v, const btMatrix3x3& m)
 {
-#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))

    const __m128 vv = v.mVec128;

@ -1191,7 +1196,7 @@ operator*(const btVector3& v, const btMatrix3x3& m)
 SIMD_FORCE_INLINE btMatrix3x3 
 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
 {
-#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))

    __m128 m10 = m1[0].mVec128;  
    __m128 m11 = m1[1].mVec128;
--- a/Engine/lib/bullet/src/LinearMath/btMatrixX.h
+++ b/Engine/lib/bullet/src/LinearMath/btMatrixX.h
@ -0,0 +1,504 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+#ifndef BT_MATRIX_X_H
+#define BT_MATRIX_X_H
+
+#include "LinearMath/btQuickprof.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+class btIntSortPredicate
+{
+	public:
+		bool operator() ( const int& a, const int& b ) const
+		{
+			 return a < b;
+		}
+};
+
+
+template <typename T> 
+struct btMatrixX
+{
+	int m_rows;
+	int m_cols;
+	int m_operations;
+	int m_resizeOperations;
+	int m_setElemOperations;
+
+	btAlignedObjectArray<T>	m_storage;
+	btAlignedObjectArray< btAlignedObjectArray<int> > m_rowNonZeroElements1;
+	btAlignedObjectArray< btAlignedObjectArray<int> > m_colNonZeroElements;
+
+	T* getBufferPointerWritable() 
+	{
+		return m_storage.size() ? &m_storage[0] : 0;
+	}
+
+	const T* getBufferPointer() const
+	{
+		return m_storage.size() ? &m_storage[0] : 0;
+	}
+	btMatrixX()
+		:m_rows(0),
+		m_cols(0),
+		m_operations(0),
+		m_resizeOperations(0),
+		m_setElemOperations(0)
+	{
+	}
+	btMatrixX(int rows,int cols)
+		:m_rows(rows),
+		m_cols(cols),
+		m_operations(0),
+		m_resizeOperations(0),
+		m_setElemOperations(0)
+	{
+		resize(rows,cols);
+	}
+	void resize(int rows, int cols)
+	{
+		m_resizeOperations++;
+		m_rows = rows;
+		m_cols = cols;
+		{
+			BT_PROFILE("m_storage.resize");
+			m_storage.resize(rows*cols);
+		}
+		clearSparseInfo();
+	}
+	int cols() const
+	{
+		return m_cols;
+	}
+	int rows() const
+	{
+		return m_rows;
+	}
+	///we don't want this read/write operator(), because we cannot keep track of non-zero elements, use setElem instead
+	/*T& operator() (int row,int col)
+	{
+		return m_storage[col*m_rows+row];
+	}
+	*/
+
+	void addElem(int row,int col, T val)
+	{
+		if (val)
+		{
+			if (m_storage[col+row*m_cols]==0.f)
+			{
+				setElem(row,col,val);
+			} else
+			{
+				m_storage[row*m_cols+col] += val;
+			}
+		}
+	}
+	
+	void copyLowerToUpperTriangle()
+	{
+		int count=0;
+		for (int row=0;row<m_rowNonZeroElements1.size();row++)
+		{
+			for (int j=0;j<m_rowNonZeroElements1[row].size();j++)
+			{
+				int col = m_rowNonZeroElements1[row][j];
+				setElem(col,row, (*this)(row,col));
+				count++;
+
+			}
+		}
+		//printf("copyLowerToUpperTriangle copied %d elements out of %dx%d=%d\n", count,rows(),cols(),cols()*rows());
+	}
+	void setElem(int row,int col, T val)
+	{
+		m_setElemOperations++;
+		if (val)
+		{
+			if (m_storage[col+row*m_cols]==0.f)
+			{
+				m_rowNonZeroElements1[row].push_back(col);
+				m_colNonZeroElements[col].push_back(row);
+			}
+			m_storage[row*m_cols+col] = val;
+		}
+	}
+	const T& operator() (int row,int col) const
+	{
+		return m_storage[col+row*m_cols];
+	}
+
+	void clearSparseInfo()
+	{
+		BT_PROFILE("clearSparseInfo=0");
+		m_rowNonZeroElements1.resize(m_rows);
+		m_colNonZeroElements.resize(m_cols);
+		for (int i=0;i<m_rows;i++)
+			m_rowNonZeroElements1[i].resize(0);
+		for (int j=0;j<m_cols;j++)
+			m_colNonZeroElements[j].resize(0);
+	}
+
+	void setZero()
+	{
+		{
+			BT_PROFILE("storage=0");
+			btSetZero(&m_storage[0],m_storage.size());
+			//memset(&m_storage[0],0,sizeof(T)*m_storage.size());
+			//for (int i=0;i<m_storage.size();i++)
+	//			m_storage[i]=0;
+		}
+		{
+			BT_PROFILE("clearSparseInfo=0");
+			clearSparseInfo();
+		}
+	}
+
+	void	printMatrix(const char* msg)
+	{
+		printf("%s ---------------------\n",msg);
+		for (int i=0;i<rows();i++)
+		{
+			printf("\n");
+			for (int j=0;j<cols();j++)
+			{
+				printf("%2.1f\t",(*this)(i,j));
+			}
+		}
+		printf("\n---------------------\n");
+
+	}
+	void	printNumZeros(const char* msg)
+	{
+		printf("%s: ",msg);
+		int numZeros = 0;
+		for (int i=0;i<m_storage.size();i++)
+			if (m_storage[i]==0)
+				numZeros++;
+		int total = m_cols*m_rows;
+		int computedNonZero = total-numZeros;
+		int nonZero = 0;
+		for (int i=0;i<m_colNonZeroElements.size();i++)
+			nonZero += m_colNonZeroElements[i].size();
+		btAssert(computedNonZero==nonZero);
+		if(computedNonZero!=nonZero)
+		{
+			printf("Error: computedNonZero=%d, but nonZero=%d\n",computedNonZero,nonZero);
+		}
+		//printf("%d numZeros out of %d (%f)\n",numZeros,m_cols*m_rows,numZeros/(m_cols*m_rows));
+		printf("total %d, %d rows, %d cols, %d non-zeros (%f %)\n", total, rows(),cols(), nonZero,100.f*(T)nonZero/T(total));
+	}
+	/*
+	void rowComputeNonZeroElements()
+	{
+		m_rowNonZeroElements1.resize(rows());
+		for (int i=0;i<rows();i++)
+		{
+			m_rowNonZeroElements1[i].resize(0);
+			for (int j=0;j<cols();j++)
+			{
+				if ((*this)(i,j)!=0.f)
+				{
+					m_rowNonZeroElements1[i].push_back(j);
+				}
+			}
+		}
+	}
+	*/
+	btMatrixX transpose() const
+	{
+		//transpose is optimized for sparse matrices
+		btMatrixX tr(m_cols,m_rows);
+		tr.setZero();
+#if 0
+		for (int i=0;i<m_cols;i++)
+			for (int j=0;j<m_rows;j++)
+			{
+				T v = (*this)(j,i);
+				if (v)
+				{
+					tr.setElem(i,j,v);
+				}
+			}
+#else		
+		for (int i=0;i<m_colNonZeroElements.size();i++)
+			for (int h=0;h<m_colNonZeroElements[i].size();h++)
+			{
+				int j = m_colNonZeroElements[i][h];
+				T v = (*this)(j,i);
+				tr.setElem(i,j,v);
+			}
+#endif
+		return tr;
+	}
+
+	void sortRowIndexArrays()
+	{
+		for (int i=0;i<m_rowNonZeroElements1[i].size();i++)
+		{
+			m_rowNonZeroElements1[i].quickSort(btIntSortPredicate());
+		}
+	}
+
+	void sortColIndexArrays()
+	{
+		for (int i=0;i<m_colNonZeroElements[i].size();i++)
+		{
+			m_colNonZeroElements[i].quickSort(btIntSortPredicate());
+		}
+	}
+
+	btMatrixX operator*(const btMatrixX& other)
+	{
+		//btMatrixX*btMatrixX implementation, optimized for sparse matrices
+		btAssert(cols() == other.rows());
+
+		btMatrixX res(rows(),other.cols());
+		res.setZero();
+//		BT_PROFILE("btMatrixX mul");
+		for (int j=0; j < res.cols(); ++j)
+		{
+			//int numZero=other.m_colNonZeroElements[j].size();
+			//if (numZero)
+			{
+				for (int i=0; i < res.rows(); ++i)
+				//for (int g = 0;g<m_colNonZeroElements[j].size();g++)
+				{
+					T dotProd=0;
+					T dotProd2=0;
+					int waste=0,waste2=0;
+
+					bool doubleWalk = false;
+					if (doubleWalk)
+					{
+						int numRows = m_rowNonZeroElements1[i].size();
+						int numOtherCols = other.m_colNonZeroElements[j].size();
+						for (int ii=0;ii<numRows;ii++)
+						{
+							int vThis=m_rowNonZeroElements1[i][ii];
+						}
+
+						for (int ii=0;ii<numOtherCols;ii++)
+						{
+							int vOther = other.m_colNonZeroElements[j][ii];
+						}
+
+
+						int indexRow = 0;
+						int indexOtherCol = 0;
+						while (indexRow < numRows && indexOtherCol < numOtherCols)
+						{
+							int vThis=m_rowNonZeroElements1[i][indexRow];
+							int vOther = other.m_colNonZeroElements[j][indexOtherCol];
+							if (vOther==vThis)
+							{
+								dotProd += (*this)(i,vThis) * other(vThis,j);
+							}
+							if (vThis<vOther)
+							{
+								indexRow++;
+							} else
+							{
+								indexOtherCol++;
+							}
+						}
+
+					} else
+					{
+						bool useOtherCol = true;
+						if (other.m_colNonZeroElements[j].size() <m_rowNonZeroElements1[i].size())
+						{
+						useOtherCol=true;
+						}
+						if (!useOtherCol )
+						{
+							for (int q=0;q<other.m_colNonZeroElements[j].size();q++)
+							{
+								int v = other.m_colNonZeroElements[j][q];
+								T w = (*this)(i,v);
+								if (w!=0.f)
+								{
+									dotProd+=w*other(v,j);
+								}
+						
+							}
+						}
+						else
+						{
+							for (int q=0;q<m_rowNonZeroElements1[i].size();q++)
+							{
+								int v=m_rowNonZeroElements1[i][q];
+								T w = (*this)(i,v);
+								if (other(v,j)!=0.f)
+								{
+									dotProd+=w*other(v,j);	
+								}
+						
+							}
+						}
+					}
+					if (dotProd)
+						res.setElem(i,j,dotProd);
+				}
+			}
+		}
+		return res;
+	}
+
+	// this assumes the 4th and 8th rows of B and C are zero.
+	void multiplyAdd2_p8r (const btScalar *B, const btScalar *C,  int numRows,  int numRowsOther ,int row, int col)
+	{
+		const btScalar *bb = B;
+		for ( int i = 0;i<numRows;i++)
+		{
+			const btScalar *cc = C;
+			for ( int j = 0;j<numRowsOther;j++)
+			{
+				btScalar sum;
+				sum  = bb[0]*cc[0];
+				sum += bb[1]*cc[1];
+				sum += bb[2]*cc[2];
+				sum += bb[4]*cc[4];
+				sum += bb[5]*cc[5];
+				sum += bb[6]*cc[6];
+				addElem(row+i,col+j,sum);
+				cc += 8;
+			}
+			bb += 8;
+		}
+	}
+
+	void multiply2_p8r (const btScalar *B, const btScalar *C,  int numRows,  int numRowsOther, int row, int col)
+	{
+		btAssert (numRows>0 && numRowsOther>0 && B && C);
+		const btScalar *bb = B;
+		for ( int i = 0;i<numRows;i++)
+		{
+			const btScalar *cc = C;
+			for ( int j = 0;j<numRowsOther;j++)
+			{
+				btScalar sum;
+				sum  = bb[0]*cc[0];
+				sum += bb[1]*cc[1];
+				sum += bb[2]*cc[2];
+				sum += bb[4]*cc[4];
+				sum += bb[5]*cc[5];
+				sum += bb[6]*cc[6];
+				setElem(row+i,col+j,sum);
+				cc += 8;
+			}
+			bb += 8;
+		}
+	}
+
+};
+
+template <typename T> 
+struct btVectorX
+{
+	btAlignedObjectArray<T>	m_storage;
+
+	btVectorX()
+	{
+	}
+	btVectorX(int numRows)
+	{
+		m_storage.resize(numRows);
+	}
+
+	void resize(int rows)
+	{
+		m_storage.resize(rows);
+	}
+	int cols() const
+	{
+		return 1;
+	}
+	int rows() const
+	{
+		return m_storage.size();
+	}
+	int size() const
+	{
+		return rows();
+	}
+	void	setZero()
+	{
+	//	for (int i=0;i<m_storage.size();i++)
+	//		m_storage[i]=0;
+		//memset(&m_storage[0],0,sizeof(T)*m_storage.size());
+		btSetZero(&m_storage[0],m_storage.size());
+	}
+	const T& operator[] (int index) const
+	{
+		return m_storage[index];
+	}
+
+	T& operator[] (int index)
+	{
+		return m_storage[index];
+	}
+
+	T* getBufferPointerWritable() 
+	{
+		return m_storage.size() ? &m_storage[0] : 0;
+	}
+
+	const T* getBufferPointer() const
+	{
+		return m_storage.size() ? &m_storage[0] : 0;
+	}
+
+};
+/*
+template <typename T> 
+void setElem(btMatrixX<T>& mat, int row, int col, T val)
+{
+	mat.setElem(row,col,val);
+}
+*/
+
+
+typedef btMatrixX<float> btMatrixXf;
+typedef btVectorX<float> btVectorXf;
+
+typedef btMatrixX<double> btMatrixXd;
+typedef btVectorX<double> btVectorXd;
+
+
+
+inline void setElem(btMatrixXd& mat, int row, int col, double val)
+{
+	mat.setElem(row,col,val);
+}
+
+inline void setElem(btMatrixXf& mat, int row, int col, float val)
+{
+	mat.setElem(row,col,val);
+}
+
+#ifdef BT_USE_DOUBLE_PRECISION
+	#define btVectorXu btVectorXd
+	#define btMatrixXu btMatrixXd
+#else
+	#define btVectorXu btVectorXf
+	#define btMatrixXu btMatrixXf
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+
+#endif//BT_MATRIX_H_H
--- a/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.cpp
@ -60,10 +60,10 @@ unsigned int btPolarDecomposition::decompose(const btMatrix3x3& a, btMatrix3x3&
      break;

    const btScalar gamma = btPow(h_norm / u_norm, 0.25f);
-    const btScalar inv_gamma = 1.0 / gamma;
+    const btScalar inv_gamma = btScalar(1.0) / gamma;

    // Determine the delta to 'u'
-    const btMatrix3x3 delta = (u * (gamma - 2.0) + h.transpose() * inv_gamma) * 0.5;
+    const btMatrix3x3 delta = (u * (gamma - btScalar(2.0)) + h.transpose() * inv_gamma) * btScalar(0.5);

    // Update the matrices
    u += delta;
--- a/Engine/lib/bullet/src/LinearMath/btQuaternion.h
+++ b/Engine/lib/bullet/src/LinearMath/btQuaternion.h
@ -27,11 +27,17 @@ subject to the following restrictions:

 #ifdef BT_USE_SSE

-const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
+//const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
+#define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))

 #endif

-#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
+#if defined(BT_USE_SSE) 
+
+#define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
+#define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
+
+#elif defined(BT_USE_NEON)

 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
@ -285,7 +291,7 @@ public:
   * @param q The other quaternion */
 	btScalar dot(const btQuaternion& q) const
 	{
-#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 		__m128	vd;
 		
 		vd = _mm_mul_ps(mVec128, q.mVec128);
@ -384,7 +390,7 @@ public:
 	{
 		return *this / length();
 	} 
-  /**@brief Return the angle between this quaternion and the other 
+	/**@brief Return the ***half*** angle between this quaternion and the other
   * @param q The other quaternion */
 	btScalar angle(const btQuaternion& q) const 
 	{
@ -392,6 +398,19 @@ public:
 		btAssert(s != btScalar(0.0));
 		return btAcos(dot(q) / s);
 	}
+	
+	/**@brief Return the angle between this quaternion and the other along the shortest path
+	* @param q The other quaternion */
+	btScalar angleShortestPath(const btQuaternion& q) const 
+	{
+		btScalar s = btSqrt(length2() * q.length2());
+		btAssert(s != btScalar(0.0));
+		if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+			return btAcos(dot(-q) / s) * btScalar(2.0);
+		else 
+			return btAcos(dot(q) / s) * btScalar(2.0);
+	}
+
  /**@brief Return the angle of rotation represented by this quaternion */
 	btScalar getAngle() const 
 	{
@ -399,6 +418,19 @@ public:
 		return s;
 	}

+	/**@brief Return the angle of rotation represented by this quaternion along the shortest path*/
+	btScalar getAngleShortestPath() const 
+	{
+		btScalar s;
+		if (dot(*this) < 0)
+			s = btScalar(2.) * btAcos(m_floats[3]);
+		else
+			s = btScalar(2.) * btAcos(-m_floats[3]);
+
+		return s;
+	}
+
+
 	/**@brief Return the axis of the rotation represented by this quaternion */
 	btVector3 getAxis() const
 	{
@ -498,7 +530,7 @@ public:
 	  btAssert(magnitude > btScalar(0));

    btScalar product = dot(q) / magnitude;
-    if (btFabs(product) != btScalar(1))
+    if (btFabs(product) < btScalar(1))
 		{
      // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
      const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
@ -835,7 +867,7 @@ quatRotate(const btQuaternion& rotation, const btVector3& v)
 {
 	btQuaternion q = rotation * v;
 	q *= rotation.inverse();
-#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 	return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
 #elif defined(BT_USE_NEON)
    return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
--- a/Engine/lib/bullet/src/LinearMath/btScalar.h
+++ b/Engine/lib/bullet/src/LinearMath/btScalar.h
@ -28,7 +28,7 @@ subject to the following restrictions:
 #include <float.h>

 /* SVN $Revision$ on $Date$ from http://bullet.googlecode.com*/
-#define BT_BULLET_VERSION 281
+#define BT_BULLET_VERSION 282

 inline int	btGetVersion()
 {
@ -68,6 +68,10 @@ inline int	btGetVersion()
 		#else

 #if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
+			#if _MSC_VER>1400
+				#define BT_USE_SIMD_VECTOR3
+			#endif
+
 			#define BT_USE_SSE
 			#ifdef BT_USE_SSE
 			//BT_USE_SSE_IN_API is disabled under Windows by default, because 
@ -159,7 +163,8 @@ inline int	btGetVersion()

 #if (defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
    #if defined (__i386__) || defined (__x86_64__)
-        #define BT_USE_SSE
+		#define BT_USE_SIMD_VECTOR3
+		#define BT_USE_SSE
 		//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
 		//if apps run into issues, we will disable the next line
 		#define BT_USE_SSE_IN_API
@ -175,10 +180,11 @@ inline int	btGetVersion()
                #include <emmintrin.h>
            #endif
        #endif //BT_USE_SSE
-    #elif defined( __armv7__ )
+    #elif defined( __ARM_NEON__ )
        #ifdef __clang__
            #define BT_USE_NEON 1
-
+			#define BT_USE_SIMD_VECTOR3
+		
            #if defined BT_USE_NEON && defined (__clang__)
                #include <arm_neon.h>
            #endif//BT_USE_NEON
@ -207,8 +213,7 @@ inline int	btGetVersion()
 	}
 	#else//defined (__i386__) || defined (__x86_64__)
 		#define btAssert assert
-	#end//defined (__i386__) || defined (__x86_64__)
-	#endif
+	#endif//defined (__i386__) || defined (__x86_64__)
 	#else//defined(DEBUG) || defined (_DEBUG)
 		#define btAssert(x)
 	#endif//defined(DEBUG) || defined (_DEBUG)
@ -252,10 +257,12 @@ inline int	btGetVersion()

 ///The btScalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
 #if defined(BT_USE_DOUBLE_PRECISION)
+
 typedef double btScalar;
 //this number could be bigger in double precision
 #define BT_LARGE_FLOAT 1e30
 #else
+
 typedef float btScalar;
 //keep BT_LARGE_FLOAT*BT_LARGE_FLOAT < FLT_MAX
 #define BT_LARGE_FLOAT 1e18f
@ -265,7 +272,8 @@ typedef float btScalar;
 typedef __m128 btSimdFloat4;
 #endif//BT_USE_SSE

-#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
+#if defined (BT_USE_SSE)
+//#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
 #ifdef _WIN32

 #ifndef BT_NAN
@ -278,6 +286,8 @@ static  int btInfinityMask = 0x7F800000;
 #define BT_INFINITY (*(float*)&btInfinityMask)
 #endif

+//use this, in case there are clashes (such as xnamath.h)
+#ifndef BT_NO_SIMD_OPERATOR_OVERLOADS
 inline __m128 operator + (const __m128 A, const __m128 B)
 {
    return _mm_add_ps(A, B);
@ -292,6 +302,7 @@ inline __m128 operator * (const __m128 A, const __m128 B)
 {
    return _mm_mul_ps(A, B);
 }
+#endif //BT_NO_SIMD_OPERATOR_OVERLOADS

 #define btCastfTo128i(a) (_mm_castps_si128(a))
 #define btCastfTo128d(a) (_mm_castps_pd(a))
@ -311,7 +322,24 @@ inline __m128 operator * (const __m128 A, const __m128 B)
 #define BT_INFINITY INFINITY
 #define BT_NAN NAN
 #endif//_WIN32
-#endif //BT_USE_SSE_IN_API
+#else
+
+#ifdef BT_USE_NEON
+	#include <arm_neon.h>
+
+	typedef float32x4_t btSimdFloat4;
+	#define BT_INFINITY INFINITY
+	#define BT_NAN NAN
+	#define btAssign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
+#else//BT_USE_NEON
+
+	#ifndef BT_INFINITY
+	static  int btInfinityMask = 0x7F800000;
+	#define BT_INFINITY (*(float*)&btInfinityMask)
+	#endif
+#endif//BT_USE_NEON
+
+#endif //BT_USE_SSE

 #ifdef BT_USE_NEON
 #include <arm_neon.h>
@ -403,15 +431,15 @@ SIMD_FORCE_INLINE btScalar btFmod(btScalar x,btScalar y) { return fmodf(x,y); }
 	
 #endif

-#define SIMD_2_PI         btScalar(6.283185307179586232)
-#define SIMD_PI           (SIMD_2_PI * btScalar(0.5))
-#define SIMD_HALF_PI      (SIMD_2_PI * btScalar(0.25))
+#define SIMD_PI           btScalar(3.1415926535897932384626433832795029)
+#define SIMD_2_PI         btScalar(2.0) * SIMD_PI
+#define SIMD_HALF_PI      (SIMD_PI * btScalar(0.5))
 #define SIMD_RADS_PER_DEG (SIMD_2_PI / btScalar(360.0))
 #define SIMD_DEGS_PER_RAD  (btScalar(360.0) / SIMD_2_PI)
 #define SIMDSQRT12 btScalar(0.7071067811865475244008443621048490)

 #define btRecipSqrt(x) ((btScalar)(btScalar(1.0)/btSqrt(btScalar(x))))		/* reciprocal square root */
-
+#define btRecip(x) (btScalar(1.0)/btScalar(x))

 #ifdef BT_USE_DOUBLE_PRECISION
 #define SIMD_EPSILON      DBL_EPSILON
@ -602,6 +630,46 @@ SIMD_FORCE_INLINE double btUnswapEndianDouble(const unsigned char *src)
 	return d;
 }

+template<typename T>
+SIMD_FORCE_INLINE void btSetZero(T* a, int n)
+{
+  T* acurr = a;
+  size_t ncurr = n;
+  while (ncurr > 0) 
+  {
+    *(acurr++) = 0;
+    --ncurr;
+  }
+}
+
+
+SIMD_FORCE_INLINE btScalar btLargeDot(const btScalar *a, const btScalar *b, int n)
+{  
+  btScalar p0,q0,m0,p1,q1,m1,sum;
+  sum = 0;
+  n -= 2;
+  while (n >= 0) {
+    p0 = a[0]; q0 = b[0];
+    m0 = p0 * q0;
+    p1 = a[1]; q1 = b[1];
+    m1 = p1 * q1;
+    sum += m0;
+    sum += m1;
+    a += 2;
+    b += 2;
+    n -= 2;
+  }
+  n += 2;
+  while (n > 0) {
+    sum += (*a) * (*b);
+    a++;
+    b++;
+    n--;
+  }
+  return sum;
+}
+
+
 // returns normalized value in range [-SIMD_PI, SIMD_PI]
 SIMD_FORCE_INLINE btScalar btNormalizeAngle(btScalar angleInRadians) 
 {
@ -620,6 +688,8 @@ SIMD_FORCE_INLINE btScalar btNormalizeAngle(btScalar angleInRadians)
 	}
 }

+
+
 ///rudimentary class to provide type info
 struct btTypedObject
 {
--- a/Engine/lib/bullet/src/LinearMath/btSerializer.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btSerializer.cpp
--- a/Engine/lib/bullet/src/LinearMath/btSerializer.h
+++ b/Engine/lib/bullet/src/LinearMath/btSerializer.h
@ -17,7 +17,6 @@ subject to the following restrictions:
 #define BT_SERIALIZER_H

 #include "btScalar.h" // has definitions like SIMD_FORCE_INLINE
-#include "btStackAlloc.h"
 #include "btHashMap.h"

 #if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
@ -439,7 +438,7 @@ public:

 			buffer[9] = '2';
 			buffer[10] = '8';
-			buffer[11] = '1';
+			buffer[11] = '2';

 		}

--- a/Engine/lib/bullet/src/LinearMath/btVector3.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btVector3.cpp
@ -19,9 +19,17 @@
 #define BT_USE_SSE_IN_API
 #endif

+
 #include "btVector3.h"

-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+
+
+#if defined BT_USE_SIMD_VECTOR3
+
+#if DEBUG
+#include <string.h>//for memset
+#endif
+

 #ifdef __APPLE__
 #include <stdint.h>
@ -43,7 +51,7 @@ long _maxdot_large( const float *vv, const float *vec, unsigned long count, floa
 long _maxdot_large( const float *vv, const float *vec, unsigned long count, float *dotResult )
 {
    const float4 *vertices = (const float4*) vv;
-    static const unsigned char indexTable[16] = {-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
+    static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
    float4 dotMax = btAssign128( -BT_INFINITY,  -BT_INFINITY,  -BT_INFINITY,  -BT_INFINITY );
    float4 vvec = _mm_loadu_ps( vec );
    float4 vHi = btCastiTo128f(_mm_shuffle_epi32( btCastfTo128i( vvec), 0xaa ));          /// zzzz
@ -428,7 +436,7 @@ long _mindot_large( const float *vv, const float *vec, unsigned long count, floa
 long _mindot_large( const float *vv, const float *vec, unsigned long count, float *dotResult )
 {
    const float4 *vertices = (const float4*) vv;
-    static const unsigned char indexTable[16] = {-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
+    static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
    float4 dotmin = btAssign128( BT_INFINITY,  BT_INFINITY,  BT_INFINITY,  BT_INFINITY );
    float4 vvec = _mm_loadu_ps( vec );
    float4 vHi = btCastiTo128f(_mm_shuffle_epi32( btCastfTo128i( vvec), 0xaa ));          /// zzzz
@ -815,7 +823,8 @@ long _mindot_large( const float *vv, const float *vec, unsigned long count, floa
 #elif defined BT_USE_NEON
 #define ARM_NEON_GCC_COMPATIBILITY  1
 #include <arm_neon.h>
-
+#include <sys/types.h>
+#include <sys/sysctl.h> //for sysctlbyname

 static long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult );
 static long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult );
@ -827,11 +836,34 @@ static long _mindot_large_sel( const float *vv, const float *vec, unsigned long
 long (*_maxdot_large)( const float *vv, const float *vec, unsigned long count, float *dotResult ) = _maxdot_large_sel;
 long (*_mindot_large)( const float *vv, const float *vec, unsigned long count, float *dotResult ) = _mindot_large_sel;

-extern "C" {int  _get_cpu_capabilities( void );}
+
+static inline uint32_t btGetCpuCapabilities( void )
+{
+    static uint32_t capabilities = 0;
+    static bool testedCapabilities = false;
+
+    if( 0 == testedCapabilities)
+    {
+        uint32_t hasFeature = 0;
+        size_t featureSize = sizeof( hasFeature );
+        int err = sysctlbyname( "hw.optional.neon_hpfp", &hasFeature, &featureSize, NULL, 0 );
+
+        if( 0 == err && hasFeature)
+            capabilities |= 0x2000;
+
+		testedCapabilities = true;
+    }
+    
+    return capabilities;
+}
+
+
+

 static long _maxdot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult )
 {
-    if( _get_cpu_capabilities() & 0x2000 )
+
+    if( btGetCpuCapabilities() & 0x2000 )
        _maxdot_large = _maxdot_large_v1;
    else
        _maxdot_large = _maxdot_large_v0;
@ -841,7 +873,8 @@ static long _maxdot_large_sel( const float *vv, const float *vec, unsigned long

 static long _mindot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult )
 {
-    if( _get_cpu_capabilities() & 0x2000 )
+
+    if( btGetCpuCapabilities() & 0x2000 )
        _mindot_large = _mindot_large_v1;
    else
        _mindot_large = _mindot_large_v0;
@ -864,8 +897,8 @@ long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, f
    float32x2_t dotMaxHi = (float32x2_t) { -BT_INFINITY, -BT_INFINITY };
    uint32x2_t indexLo = (uint32x2_t) {0, 1};
    uint32x2_t indexHi = (uint32x2_t) {2, 3};
-    uint32x2_t iLo = (uint32x2_t) {-1, -1};
-    uint32x2_t iHi = (uint32x2_t) {-1, -1};
+    uint32x2_t iLo = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
+    uint32x2_t iHi = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
    const uint32x2_t four = (uint32x2_t) {4,4};

    for( ; i+8 <= count; i+= 8 )
@ -1051,7 +1084,7 @@ long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, f
    float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
    const uint32x4_t four = (uint32x4_t){ 4, 4, 4, 4 };
    uint32x4_t local_index = (uint32x4_t) {0, 1, 2, 3};
-    uint32x4_t index = (uint32x4_t) { -1, -1, -1, -1 };
+    uint32x4_t index = (uint32x4_t) { static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1) };
    float32x4_t maxDot = (float32x4_t) { -BT_INFINITY, -BT_INFINITY, -BT_INFINITY, -BT_INFINITY };
    
    unsigned long i = 0;
@ -1249,8 +1282,8 @@ long _mindot_large_v0( const float *vv, const float *vec, unsigned long count, f
    float32x2_t dotMinHi = (float32x2_t) { BT_INFINITY, BT_INFINITY };
    uint32x2_t indexLo = (uint32x2_t) {0, 1};
    uint32x2_t indexHi = (uint32x2_t) {2, 3};
-    uint32x2_t iLo = (uint32x2_t) {-1, -1};
-    uint32x2_t iHi = (uint32x2_t) {-1, -1};
+    uint32x2_t iLo = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
+    uint32x2_t iHi = (uint32x2_t) {static_cast<uint32_t>(-1), static_cast<uint32_t>(-1)};
    const uint32x2_t four = (uint32x2_t) {4,4};
    
    for( ; i+8 <= count; i+= 8 )
@ -1434,7 +1467,7 @@ long _mindot_large_v1( const float *vv, const float *vec, unsigned long count, f
    float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
    const uint32x4_t four = (uint32x4_t){ 4, 4, 4, 4 };
    uint32x4_t local_index = (uint32x4_t) {0, 1, 2, 3};
-    uint32x4_t index = (uint32x4_t) { -1, -1, -1, -1 };
+    uint32x4_t index = (uint32x4_t) { static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1) };
    float32x4_t minDot = (float32x4_t) { BT_INFINITY, BT_INFINITY, BT_INFINITY, BT_INFINITY };
    
    unsigned long i = 0;
--- a/Engine/lib/bullet/src/LinearMath/btVector3.h
+++ b/Engine/lib/bullet/src/LinearMath/btVector3.h
@ -53,19 +53,24 @@ subject to the following restrictions:
 #define btvxyzMaskf btvFFF0fMask
 #define btvAbsfMask btCastiTo128f(btvAbsMask)

+//there is an issue with XCode 3.2 (LCx errors)
+#define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f))
+#define v1110		 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f))
+#define vHalf		 (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f))
+#define v1_5		 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f))

-
-const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
-const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
-const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
-const __m128 ATTRIBUTE_ALIGNED16(v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};
+//const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
+//const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
+//const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
+//const __m128 ATTRIBUTE_ALIGNED16(v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};

 #endif

 #ifdef BT_USE_NEON

 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
-const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
+const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
+	static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};

@ -229,7 +234,7 @@ public:
   * @param v The other vector in the dot product */
 	SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)		
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
 		__m128 z = _mm_movehl_ps(vd, vd);
 		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
@ -260,6 +265,12 @@ public:
 		return btSqrt(length2());
 	}

+	/**@brief Return the norm (length) of the vector */
+	SIMD_FORCE_INLINE btScalar norm() const
+	{
+		return length();
+	}
+
  /**@brief Return the distance squared between the ends of this and another vector
   * This is symantically treating the vector like a point */
 	SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
@ -285,6 +296,9 @@ public:
   * x^2 + y^2 + z^2 = 1 */
 	SIMD_FORCE_INLINE btVector3& normalize() 
 	{
+		
+		btAssert(length() != btScalar(0));
+
 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)		
        // dot product first
 		__m128 vd = _mm_mul_ps(mVec128, mVec128);
@ -345,7 +359,8 @@ public:
  /**@brief Return a vector will the absolute values of each element */
 	SIMD_FORCE_INLINE btVector3 absolute() const 
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
+
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
 		return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
 #elif defined(BT_USE_NEON)
 		return btVector3(vabsq_f32(mVec128));
@ -400,7 +415,7 @@ public:

 	SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 		// cross:
 		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
 		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
@ -632,7 +647,7 @@ public:

 	void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 
 		__m128 V  = _mm_and_ps(mVec128, btvFFF0fMask);
 		__m128 V0 = _mm_xor_ps(btvMzeroMask, V);
@ -702,7 +717,7 @@ public:
    /* create a vector as  btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 ))  */
    SIMD_FORCE_INLINE btVector3  dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
    {
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)

        __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
        __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
@ -717,7 +732,7 @@ public:
        return btVector3(r);
        
 #elif defined(BT_USE_NEON)
-        static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
+        static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
        float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
        float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
        float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
@ -768,7 +783,7 @@ operator*(const btVector3& v1, const btVector3& v2)
 SIMD_FORCE_INLINE btVector3 
 operator-(const btVector3& v1, const btVector3& v2)
 {
-#if (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))

 	//	without _mm_and_ps this code causes slowdown in Concave moving
 	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
@ -788,7 +803,7 @@ operator-(const btVector3& v1, const btVector3& v2)
 SIMD_FORCE_INLINE btVector3 
 operator-(const btVector3& v)
 {
-#if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
 	__m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
 	return btVector3(_mm_and_ps(r, btvFFF0fMask)); 
 #elif defined(BT_USE_NEON)
@ -842,7 +857,7 @@ operator/(const btVector3& v, const btScalar& s)
 SIMD_FORCE_INLINE btVector3
 operator/(const btVector3& v1, const btVector3& v2)
 {
-#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
 	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
 	vec = _mm_and_ps(vec, btvFFF0fMask);
 	return btVector3(vec); 
@ -935,20 +950,16 @@ SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const

 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
 {
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 	btVector3 norm = *this;

 	return norm.normalize();
-#else
-	return *this / length();
-#endif
 } 

 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
 {
 	// wAxis must be a unit lenght vector

-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)

    __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
 	btScalar ssin = btSin( _angle );
@ -988,7 +999,7 @@ SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btS

 SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
 {
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
    #if defined _WIN32 || defined (BT_USE_SSE)
        const long scalar_cutoff = 10;
        long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
@ -996,10 +1007,8 @@ SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long arra
        const long scalar_cutoff = 4;
        extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
    #endif
-    if( array_count < scalar_cutoff )
-#else
-	
-#endif//BT_USE_SSE || BT_USE_NEON
+    if( array_count < scalar_cutoff )	
+#endif
    {
        btScalar maxDot = -SIMD_INFINITY;
        int i = 0;
@ -1018,14 +1027,14 @@ SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long arra
        dotOut = maxDot;
        return ptIndex;
    }
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
    return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
 #endif
 }

 SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
 {
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
    #if defined BT_USE_SSE
        const long scalar_cutoff = 10;
        long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
@ -1037,7 +1046,7 @@ SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long arra
    #endif
    
    if( array_count < scalar_cutoff )
-#endif//BT_USE_SSE || BT_USE_NEON
+#endif
    {
        btScalar  minDot = SIMD_INFINITY;
        int i = 0;
@ -1058,9 +1067,9 @@ SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long arra
        
        return ptIndex;
    }
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
    return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
-#endif
+#endif//BT_USE_SIMD_VECTOR3
 }


@ -1098,7 +1107,7 @@ public:

 	SIMD_FORCE_INLINE btVector4 absolute4() const 
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
+#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
 		return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
 #elif defined(BT_USE_NEON)
 		return btVector4(vabsq_f32(mVec128));